From f538416fb3bcb7a674d8b435e46edec03f819ae5 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 20 Jan 2024 22:37:02 +0100 Subject: [PATCH 0001/2895] :arrow_up: Update docs version mudler/LocalAI (#1619) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index bc05c844..87437f7b 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.5.1" + "version": "v2.6.0" } From 6a88b030eaeee79edcba056009eeeb8ce26b622a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 20 Jan 2024 23:34:46 +0100 Subject: [PATCH 0002/2895] :arrow_up: Update ggerganov/llama.cpp (#1620) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1618b759..047b5f88 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=381ee195721d8e747ee31a60c0751822b3072f02 +CPPLLAMA_VERSION?=97c1549808d2742d37584a3c9df28154bdf34417 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From eaf85a30f95d9cd0ded04a7957b4be0b8d8e7a2f Mon Sep 17 00:00:00 2001 From: Sebastian Date: Sun, 21 Jan 2024 09:56:14 +0100 Subject: [PATCH 0003/2895] fix(llama.cpp): Enable parallel requests (#1616) integrate changes from llama.cpp Signed-off-by: Sebastian --- backend/cpp/llama/grpc-server.cpp | 95 ++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 04c6586c..3bbf7ce0 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -158,8 +158,8 @@ static std::vector base64_decode(const std::string & encoded_string) // enum task_type { - COMPLETION_TASK, - CANCEL_TASK + TASK_TYPE_COMPLETION, + TASK_TYPE_CANCEL, }; struct task_server { @@ -458,8 +458,12 @@ struct llama_client_slot } bool has_budget(gpt_params &global_params) { + if (params.n_predict == -1 && global_params.n_predict == -1) + { + return true; // limitless + } n_remaining = -1; - if(params.n_predict != -1) + if (params.n_predict != -1) { n_remaining = params.n_predict - n_decoded; } @@ -467,7 +471,7 @@ struct llama_client_slot { n_remaining = global_params.n_predict - n_decoded; } - return n_remaining > 0 || n_remaining == -1; // no budget || limitless + return n_remaining > 0; // no budget } bool available() const { @@ -1113,7 +1117,7 @@ struct llama_server_context } // check the limits - if (slot.n_decoded > 2 && slot.has_next_token && !slot.has_budget(params)) + if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params)) { slot.stopped_limit = true; slot.has_next_token = false; @@ -1177,8 +1181,9 @@ struct llama_server_context return slot.images.size() > 0; } - void send_error(task_server& task, std::string error) + void send_error(task_server& task, const std::string &error) { + LOG_TEE("task %i - error: %s\n", task.id, error.c_str()); std::unique_lock lock(mutex_results); task_result res; res.id = task.id; @@ -1276,7 +1281,7 @@ struct llama_server_context { std::vector probs_output = {}; const std::vector to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); - size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); + size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); if (probs_pos < probs_stop_pos) { @@ -1336,7 +1341,7 @@ struct llama_server_context { probs = std::vector( slot.generated_token_probs.begin(), - slot.generated_token_probs.begin() + slot.sent_token_probs_index); + slot.generated_token_probs.end()); } res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); } @@ -1346,6 +1351,11 @@ struct llama_server_context res.result_json["oaicompat_token_ctr"] = slot.n_decoded; res.result_json["model"] = slot.oaicompat_model; } + queue_results.push_back(res); + condition_results.notify_all(); + + // done with results, unlock + lock.unlock(); // parent multitask, if any, needs to be updated if (slot.multitask_id != -1) @@ -1353,8 +1363,6 @@ struct llama_server_context update_multi_task(slot.multitask_id, slot.task_id, res); } - queue_results.push_back(res); - condition_results.notify_all(); } void send_embedding(llama_client_slot &slot) @@ -1399,11 +1407,11 @@ struct llama_server_context task.data = std::move(data); task.infill_mode = infill; task.embedding_mode = embedding; - task.type = COMPLETION_TASK; + task.type = TASK_TYPE_COMPLETION; task.multitask_id = multitask_id; // when a completion task's prompt array is not a singleton, we split it into multiple requests - if (task.data.at("prompt").size() > 1) + if (task.data.count("prompt") && task.data.at("prompt").size() > 1) { lock.unlock(); // entering new func scope return split_multiprompt_task(task); @@ -1521,7 +1529,7 @@ struct llama_server_context std::unique_lock lock(mutex_tasks); task_server task; task.id = id_gen++; - task.type = CANCEL_TASK; + task.type = TASK_TYPE_CANCEL; task.target_id = task_id; queue_tasks.push_back(task); condition_tasks.notify_one(); @@ -1551,32 +1559,41 @@ struct llama_server_context void process_tasks() { std::unique_lock lock(mutex_tasks); + std::vector deferred_tasks; while (!queue_tasks.empty()) { task_server task = queue_tasks.front(); queue_tasks.erase(queue_tasks.begin()); switch (task.type) { - case COMPLETION_TASK: { + case TASK_TYPE_COMPLETION: { llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); if (slot == nullptr) { - LOG_TEE("slot unavailable\n"); - // send error result - send_error(task, "slot unavailable"); - return; + // if no slot is available, we defer this task for processing later + deferred_tasks.push_back(task); + break; } if (task.data.contains("system_prompt")) { + if (!all_slots_are_idle) { + send_error(task, "system prompt can only be updated when all slots are idle"); + break; + } process_system_prompt_data(task.data["system_prompt"]); + // reset cache_tokens for all slots + for (llama_client_slot &slot : slots) + { + slot.cache_tokens.clear(); + } } slot->reset(); - slot->infill = task.infill_mode; - slot->embedding = task.embedding_mode; - slot->task_id = task.id; + slot->infill = task.infill_mode; + slot->embedding = task.embedding_mode; + slot->task_id = task.id; slot->multitask_id = task.multitask_id; if (!launch_slot_with_data(slot, task.data)) @@ -1586,7 +1603,7 @@ struct llama_server_context break; } } break; - case CANCEL_TASK: { // release slot linked with the task id + case TASK_TYPE_CANCEL: { // release slot linked with the task id for (auto & slot : slots) { if (slot.task_id == task.target_id) @@ -1599,7 +1616,14 @@ struct llama_server_context } } + // add all the deferred tasks back the the queue + for (task_server &task : deferred_tasks) + { + queue_tasks.push_back(task); + } + // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue + std::vector agg_results; auto queue_iterator = queue_multitasks.begin(); while (queue_iterator != queue_multitasks.end()) { @@ -1620,8 +1644,7 @@ struct llama_server_context } aggregate_result.result_json = json{ "results", result_jsons }; - std::lock_guard lock(mutex_results); - queue_results.push_back(aggregate_result); + agg_results.push_back(aggregate_result); condition_results.notify_all(); queue_iterator = queue_multitasks.erase(queue_iterator); @@ -1631,14 +1654,19 @@ struct llama_server_context ++queue_iterator; } } + // done with tasks, unlock + lock.unlock(); + + // copy aggregate results of complete multi-tasks to the results queue + std::lock_guard lock_results(mutex_results); + queue_results.insert(queue_results.end(), agg_results.begin(), agg_results.end()); } bool update_slots() { // attend tasks process_tasks(); - // update the system prompt wait until all slots are idle state - if (system_need_update && all_slots_are_idle) + if (system_need_update) { LOG_TEE("updating system prompt\n"); update_system_prompt(); @@ -1714,7 +1742,6 @@ struct llama_server_context llama_batch_add(batch, slot.sampled, system_tokens.size() + slot.n_past, { slot.id }, true); - slot.n_decoded += 1; slot.n_past += 1; } @@ -1729,7 +1756,8 @@ struct llama_server_context const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get().empty()) || !slot.images.empty(); // empty prompt passed -> release the slot and send empty response - if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt) + // note: infill mode allows empty prompt + if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill) { slot.release(); slot.print_timings(); @@ -1832,7 +1860,7 @@ struct llama_server_context slot.cache_tokens = prompt_tokens; - if (slot.n_past == slot.num_prompt_tokens) + if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0) { // we have to evaluate at least 1 token to generate logits. LOG_TEE("slot %d : we have to evaluate at least 1 token to generate logits\n", slot.id); @@ -1932,6 +1960,7 @@ struct llama_server_context llama_sampling_accept(slot.ctx_sampling, ctx, id, true); + slot.n_decoded += 1; if (slot.n_decoded == 1) { slot.t_start_genereration = ggml_time_us(); @@ -2023,7 +2052,7 @@ json oaicompat_completion_params_parse( // // https://platform.openai.com/docs/api-reference/chat/create llama_sampling_params default_sparams; - llama_params["model"] = json_value(body, "model", std::string("uknown")); + llama_params["model"] = json_value(body, "model", std::string("unknown")); llama_params["prompt"] = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt' llama_params["cache_prompt"] = json_value(body, "cache_prompt", false); llama_params["temperature"] = json_value(body, "temperature", 0.0); @@ -2095,8 +2124,8 @@ static json format_final_response_oaicompat(const json &request, const task_resu {"object", streaming ? "chat.completion.chunk" : "chat.completion"}, {"usage", json{{"completion_tokens", num_tokens_predicted}, - {"prompt_tokens", num_prompt_tokens}, - {"total_tokens", num_tokens_predicted + num_prompt_tokens}}}, + {"prompt_tokens", num_prompt_tokens}, + {"total_tokens", num_tokens_predicted + num_prompt_tokens}}}, {"id", gen_chatcmplid()}}; if (server_verbose) { @@ -2439,7 +2468,7 @@ static void params_parse(const backend::ModelOptions* request, } else { params.n_parallel = 1; } - + params.cont_batching = true; // TODO: Add yarn if (!request->tensorsplit().empty()) { From 94261b1717db46d5db3a8b883ca38ed5c08be721 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Jan 2024 10:07:30 +0100 Subject: [PATCH 0004/2895] Update gpt-vision.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/gpt-vision.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md index 543e5df3..3afcab16 100644 --- a/docs/content/docs/features/gpt-vision.md +++ b/docs/content/docs/features/gpt-vision.md @@ -6,10 +6,6 @@ weight = 14 url = "/features/gpt-vision/" +++ -{{% alert note %}} -Available only on `master` builds -{{% /alert %}} - LocalAI supports understanding images by using [LLaVA](https://llava.hliu.cc/), and implements the [GPT Vision API](https://platform.openai.com/docs/guides/vision) from OpenAI. ![llava](https://github.com/mudler/LocalAI/assets/2420543/cb0a0897-3b58-4350-af66-e6f4387b58d3) @@ -28,4 +24,4 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso ### Setup -To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI/blob/master/examples/configurations/README.md#llava). \ No newline at end of file +To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI/blob/master/examples/configurations/README.md#llava). From 697c769b6422b7084f7c815c5a84bcff50f240f3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Jan 2024 14:59:48 +0100 Subject: [PATCH 0005/2895] fix(llama.cpp): enable cont batching when parallel is set (#1622) Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama/grpc-server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 3bbf7ce0..76a82a33 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2465,10 +2465,10 @@ static void params_parse(const backend::ModelOptions* request, const char *env_parallel = std::getenv("LLAMACPP_PARALLEL"); if (env_parallel != NULL) { params.n_parallel = std::stoi(env_parallel); + params.cont_batching = true; } else { params.n_parallel = 1; } - params.cont_batching = true; // TODO: Add yarn if (!request->tensorsplit().empty()) { From 47237c7c3cf5823edf41cd84c39da66af8b183c6 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 22 Jan 2024 08:54:06 +0100 Subject: [PATCH 0006/2895] :arrow_up: Update ggerganov/llama.cpp (#1623) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 047b5f88..9dfc417e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=97c1549808d2742d37584a3c9df28154bdf34417 +CPPLLAMA_VERSION?=05490fad7f7f60ff2bed9ad05cd81b44e82ccde3 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From efe2883c5dec0978460dafba111ddb22215e560d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 22 Jan 2024 23:22:01 +0100 Subject: [PATCH 0007/2895] :arrow_up: Update ggerganov/llama.cpp (#1626) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9dfc417e..f938eee3 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=05490fad7f7f60ff2bed9ad05cd81b44e82ccde3 +CPPLLAMA_VERSION?=6f9939d119b2d004c264952eb510bd106455531e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From d5d82ba344738fc44c75b174ffba47421cf635e8 Mon Sep 17 00:00:00 2001 From: coyzeng Date: Tue, 23 Jan 2024 15:56:36 +0800 Subject: [PATCH 0008/2895] feat(grpc): backend SPI pluggable in embedding mode (#1621) * run server * grpc backend embedded support * backend providable --- api/backend/embeddings.go | 2 +- api/backend/llm.go | 2 +- pkg/grpc/backend.go | 46 +++++++++++++++ pkg/grpc/client.go | 11 ---- pkg/grpc/embed.go | 121 ++++++++++++++++++++++++++++++++++++++ pkg/grpc/server.go | 20 +++++++ pkg/model/initializers.go | 6 +- pkg/model/loader.go | 8 +-- 8 files changed, 196 insertions(+), 20 deletions(-) create mode 100644 pkg/grpc/backend.go create mode 100644 pkg/grpc/embed.go diff --git a/api/backend/embeddings.go b/api/backend/embeddings.go index 63f1a831..0cf15fea 100644 --- a/api/backend/embeddings.go +++ b/api/backend/embeddings.go @@ -41,7 +41,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config. var fn func() ([]float32, error) switch model := inferenceModel.(type) { - case *grpc.Client: + case grpc.Backend: fn = func() ([]float32, error) { predictOptions := gRPCPredictOpts(c, loader.ModelPath) if len(tokens) > 0 { diff --git a/api/backend/llm.go b/api/backend/llm.go index bd320b61..9e202c53 100644 --- a/api/backend/llm.go +++ b/api/backend/llm.go @@ -31,7 +31,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode grpcOpts := gRPCModelOpts(c) - var inferenceModel *grpc.Client + var inferenceModel grpc.Backend var err error opts := modelOpts(c, o, []model.Option{ diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go new file mode 100644 index 00000000..ae8ffc5f --- /dev/null +++ b/pkg/grpc/backend.go @@ -0,0 +1,46 @@ +package grpc + +import ( + "context" + "github.com/go-skynet/LocalAI/api/schema" + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + "google.golang.org/grpc" +) + +var embeds = map[string]*embedBackend{} + +func Provide(addr string, llm LLM) { + embeds[addr] = &embedBackend{s: &server{llm: llm}} +} + +func NewClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) Backend { + if bc, ok := embeds[address]; ok { + return bc + } + return NewGrpcClient(address, parallel, wd, enableWatchDog) +} + +func NewGrpcClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) Backend { + if !enableWatchDog { + wd = nil + } + return &Client{ + address: address, + parallel: parallel, + wd: wd, + } +} + +type Backend interface { + IsBusy() bool + HealthCheck(ctx context.Context) (bool, error) + Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) + Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) + LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) + PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error + GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) + TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) + AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) + TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) + Status(ctx context.Context) (*pb.StatusResponse, error) +} diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 6f7f83bd..5e97ea73 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -27,17 +27,6 @@ type WatchDog interface { UnMark(address string) } -func NewClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) *Client { - if !enableWatchDog { - wd = nil - } - return &Client{ - address: address, - parallel: parallel, - wd: wd, - } -} - func (c *Client) IsBusy() bool { c.Lock() defer c.Unlock() diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go new file mode 100644 index 00000000..b9ab551f --- /dev/null +++ b/pkg/grpc/embed.go @@ -0,0 +1,121 @@ +package grpc + +import ( + "context" + "github.com/go-skynet/LocalAI/api/schema" + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" + "time" +) + +var _ Backend = new(embedBackend) +var _ pb.Backend_PredictStreamServer = new(embedBackendServerStream) + +type embedBackend struct { + s *server +} + +func (e *embedBackend) IsBusy() bool { + return e.s.llm.Busy() +} + +func (e *embedBackend) HealthCheck(ctx context.Context) (bool, error) { + return true, nil +} + +func (e *embedBackend) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) { + return e.s.Embedding(ctx, in) +} + +func (e *embedBackend) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) { + return e.s.Predict(ctx, in) +} + +func (e *embedBackend) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) { + return e.s.LoadModel(ctx, in) +} + +func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error { + bs := &embedBackendServerStream{ + ctx: ctx, + fn: f, + } + return e.s.PredictStream(in, bs) +} + +func (e *embedBackend) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) { + return e.s.GenerateImage(ctx, in) +} + +func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) { + return e.s.TTS(ctx, in) +} + +func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { + r, err := e.s.AudioTranscription(ctx, in) + if err != nil { + return nil, err + } + tr := &schema.Result{} + for _, s := range r.Segments { + var tks []int + for _, t := range s.Tokens { + tks = append(tks, int(t)) + } + tr.Segments = append(tr.Segments, + schema.Segment{ + Text: s.Text, + Id: int(s.Id), + Start: time.Duration(s.Start), + End: time.Duration(s.End), + Tokens: tks, + }) + } + tr.Text = r.Text + return tr, err +} + +func (e *embedBackend) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) { + return e.s.TokenizeString(ctx, in) +} + +func (e *embedBackend) Status(ctx context.Context) (*pb.StatusResponse, error) { + return e.s.Status(ctx, &pb.HealthMessage{}) +} + +type embedBackendServerStream struct { + ctx context.Context + fn func(s []byte) +} + +func (e *embedBackendServerStream) Send(reply *pb.Reply) error { + e.fn(reply.GetMessage()) + return nil +} + +func (e *embedBackendServerStream) SetHeader(md metadata.MD) error { + return nil +} + +func (e *embedBackendServerStream) SendHeader(md metadata.MD) error { + return nil +} + +func (e *embedBackendServerStream) SetTrailer(md metadata.MD) { +} + +func (e *embedBackendServerStream) Context() context.Context { + return e.ctx +} + +func (e *embedBackendServerStream) SendMsg(m any) error { + if x, ok := m.(*pb.Reply); ok { + return e.Send(x) + } + return nil +} + +func (e *embedBackendServerStream) RecvMsg(m any) error { + return nil +} diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go index 24dbe098..07d055d9 100644 --- a/pkg/grpc/server.go +++ b/pkg/grpc/server.go @@ -181,3 +181,23 @@ func StartServer(address string, model LLM) error { return nil } + +func RunServer(address string, model LLM) (func() error, error) { + lis, err := net.Listen("tcp", address) + if err != nil { + return nil, err + } + s := grpc.NewServer() + pb.RegisterBackendServer(s, &server{llm: model}) + log.Printf("gRPC Server listening at %v", lis.Addr()) + if err = s.Serve(lis); err != nil { + return func() error { + return lis.Close() + }, err + } + + return func() error { + s.GracefulStop() + return nil + }, nil +} diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index e17fc27f..e293669a 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -166,7 +166,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string } } -func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.Client, error) { +func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (grpc.Backend, error) { if parallel { return addr.GRPC(parallel, ml.wd), nil } @@ -177,7 +177,7 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C return ml.grpcClients[string(addr)], nil } -func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) { +func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) { o := NewOptions(opts...) if o.model != "" { @@ -220,7 +220,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err e return ml.resolveAddress(addr, o.parallelRequests) } -func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) { +func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { o := NewOptions(opts...) ml.mu.Lock() diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 686b4298..37c2a603 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -59,7 +59,7 @@ type ModelLoader struct { ModelPath string mu sync.Mutex // TODO: this needs generics - grpcClients map[string]*grpc.Client + grpcClients map[string]grpc.Backend models map[string]ModelAddress grpcProcesses map[string]*process.Process templates map[TemplateType]map[string]*template.Template @@ -68,7 +68,7 @@ type ModelLoader struct { type ModelAddress string -func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) *grpc.Client { +func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) grpc.Backend { enableWD := false if wd != nil { enableWD = true @@ -79,7 +79,7 @@ func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) *grpc.Client { func NewModelLoader(modelPath string) *ModelLoader { nml := &ModelLoader{ ModelPath: modelPath, - grpcClients: make(map[string]*grpc.Client), + grpcClients: make(map[string]grpc.Backend), models: make(map[string]ModelAddress), templates: make(map[TemplateType]map[string]*template.Template), grpcProcesses: make(map[string]*process.Process), @@ -163,7 +163,7 @@ func (ml *ModelLoader) StopModel(modelName string) error { } func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress { - var client *grpc.Client + var client grpc.Backend if m, ok := ml.models[s]; ok { log.Debug().Msgf("Model already loaded in memory: %s", s) if c, ok := ml.grpcClients[s]; ok { From 5e335eaead984f94591039b0a6c678b71b7298d0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Jan 2024 23:07:31 +0100 Subject: [PATCH 0009/2895] feat(transformers): support also text generation (#1630) * feat(transformers): support also text generation Signed-off-by: Ettore Di Giacinto * embedded: set seed -1 --------- Signed-off-by: Ettore Di Giacinto --- .../transformers/transformers_server.py | 53 ++++++++++++++++--- embedded/models/dolphin-2.5-mixtral-8x7b.yaml | 1 + embedded/models/llava.yaml | 1 + embedded/models/mistral-openorca.yaml | 1 + embedded/models/mixtral-instruct.yaml | 1 + embedded/models/tinyllama-chat.yaml | 1 + examples/configurations/phi-2.yaml | 1 + 7 files changed, 51 insertions(+), 8 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index e87e75cf..e6c06bab 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -15,7 +15,7 @@ import backend_pb2_grpc import grpc import torch - +import torch.cuda from transformers import AutoTokenizer, AutoModel _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -70,14 +70,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): try: self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True) # trust_remote_code is needed to use the encode method with embeddings models like jinai-v2 self.tokenizer = AutoTokenizer.from_pretrained(model_name) - - if request.CUDA: + if request.CUDA or torch.cuda.is_available(): try: - # TODO: also tensorflow, make configurable - import torch.cuda - if torch.cuda.is_available(): - print("Loading model", model_name, "to CUDA.", file=sys.stderr) - self.model = self.model.to("cuda") + print("Loading model", model_name, "to CUDA.", file=sys.stderr) + self.model = self.model.to("cuda") except Exception as err: print("Not using CUDA:", err, file=sys.stderr) except Exception as err: @@ -113,6 +109,47 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): print("Embeddings:", sentence_embeddings, file=sys.stderr) return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings) + def Predict(self, request, context): + """ + Generates text based on the given prompt and sampling parameters. + + Args: + request: The predict request. + context: The gRPC context. + + Returns: + backend_pb2.Reply: The predict result. + """ + if request.TopP == 0: + request.TopP = 0.9 + + max_tokens = 200 + if request.Tokens > 0: + max_tokens = request.Tokens + + inputs = self.tokenizer.tokenizer(request.Prompt, return_tensors="pt").input_ids + outputs = self.model.generate(inputs,max_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP) + + generated_text = self.tokenizer.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] + # Remove prompt from response if present + if request.Prompt in generated_text: + generated_text = generated_text.replace(request.Prompt, "") + + return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) + + def PredictStream(self, request, context): + """ + Generates text based on the given prompt and sampling parameters, and streams the results. + + Args: + request: The predict stream request. + context: The gRPC context. + + Returns: + backend_pb2.Result: The predict stream result. + """ + yield self.Predict(request, context) + def serve(address): server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) diff --git a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml index dbbeac0e..b6df4799 100644 --- a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml +++ b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml @@ -5,6 +5,7 @@ parameters: temperature: 0.2 top_k: 40 top_p: 0.95 + seed: -1 template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} diff --git a/embedded/models/llava.yaml b/embedded/models/llava.yaml index 551eb26b..2e571f21 100644 --- a/embedded/models/llava.yaml +++ b/embedded/models/llava.yaml @@ -17,6 +17,7 @@ parameters: temperature: 0.2 top_k: 40 top_p: 0.95 + seed: -1 template: chat: | diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml index 3a41c766..fbab4e39 100644 --- a/embedded/models/mistral-openorca.yaml +++ b/embedded/models/mistral-openorca.yaml @@ -5,6 +5,7 @@ parameters: temperature: 0.2 top_k: 40 top_p: 0.95 + seed: -1 template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} diff --git a/embedded/models/mixtral-instruct.yaml b/embedded/models/mixtral-instruct.yaml index c9c55869..3272557a 100644 --- a/embedded/models/mixtral-instruct.yaml +++ b/embedded/models/mixtral-instruct.yaml @@ -4,6 +4,7 @@ parameters: model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf temperature: 0.2 top_k: 40 + seed: -1 top_p: 0.95 template: chat: &chat | diff --git a/embedded/models/tinyllama-chat.yaml b/embedded/models/tinyllama-chat.yaml index 7c9a7579..48c44f9f 100644 --- a/embedded/models/tinyllama-chat.yaml +++ b/embedded/models/tinyllama-chat.yaml @@ -4,6 +4,7 @@ parameters: model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf temperature: 0.2 top_k: 40 + seed: -1 top_p: 0.95 template: chat_message: | diff --git a/examples/configurations/phi-2.yaml b/examples/configurations/phi-2.yaml index 67cef0cc..c09aa6ce 100644 --- a/examples/configurations/phi-2.yaml +++ b/examples/configurations/phi-2.yaml @@ -10,6 +10,7 @@ parameters: temperature: 0.2 top_k: 40 top_p: 0.95 + seed: -1 template: chat: &template | Instruct: {{.Input}} From 7690caf02035c911af8ad3e56766c578833d651e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 23 Jan 2024 23:07:51 +0100 Subject: [PATCH 0010/2895] :arrow_up: Update ggerganov/llama.cpp (#1632) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f938eee3..bdf44888 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=6f9939d119b2d004c264952eb510bd106455531e +CPPLLAMA_VERSION?=26d607608d794efa56df3bdb6043a2f94c1d632c # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From da3cd8993d58d31c79bb8ec0ebb29919f4feaf92 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jan 2024 19:50:33 +0100 Subject: [PATCH 0011/2895] :arrow_up: Update docs version mudler/LocalAI (#1631) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 87437f7b..8f66537b 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.6.0" + "version": "v2.6.1" } From 3733250b3c33a2f5446d02205054d16f648a1aa2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Jan 2024 22:51:59 +0100 Subject: [PATCH 0012/2895] :arrow_up: Update ggerganov/llama.cpp (#1642) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bdf44888..f6d7f0f5 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=26d607608d794efa56df3bdb6043a2f94c1d632c +CPPLLAMA_VERSION?=c9b316c78fba31e65879a2ec91cbafd341b88cce # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From cb7512734d643a62aab58dbc087604df4d220d60 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Jan 2024 00:13:21 +0100 Subject: [PATCH 0013/2895] transformers: correctly load automodels (#1643) * backends(transformers): use AutoModel with LLM types * examples: animagine-xl * Add codellama examples --- api/backend/options.go | 2 + api/config/config.go | 4 +- backend/backend.proto | 2 + backend/backend_grpc.pb.go | 457 ++++++++++++++++++ backend/python/autogptq/backend_pb2.py | 66 +-- backend/python/bark/backend_pb2.py | 66 +-- backend/python/coqui/backend_pb2.py | 66 +-- backend/python/diffusers/backend_pb2.py | 66 +-- backend/python/exllama/backend_pb2.py | 66 +-- backend/python/exllama2/backend_pb2.py | 66 +-- backend/python/mamba/backend_pb2.py | 66 +-- backend/python/petals/backend_pb2.py | 66 +-- .../sentencetransformers/backend_pb2.py | 66 +-- .../transformers-musicgen/backend_pb2.py | 66 +-- backend/python/transformers/backend_pb2.py | 66 +-- .../transformers/transformers_server.py | 22 +- backend/python/vall-e-x/backend_pb2.py | 66 +-- backend/python/vllm/backend_pb2.py | 66 +-- .../docs/getting-started/customize-model.md | 2 + .../docs/getting-started/quickstart.md | 15 +- embedded/models/animagine-xl.yaml | 17 + embedded/models/codellama-7b-gguf.yaml | 16 + embedded/models/codellama-7b.yaml | 14 + embedded/models/dolphin-2.5-mixtral-8x7b.yaml | 2 +- embedded/models/transformers-tinyllama.yaml | 32 ++ pkg/grpc/proto/backend.pb.go | 268 +++++----- pkg/grpc/proto/backend_grpc.pb.go | 2 +- 27 files changed, 1144 insertions(+), 569 deletions(-) create mode 100644 backend/backend_grpc.pb.go create mode 100644 embedded/models/animagine-xl.yaml create mode 100644 embedded/models/codellama-7b-gguf.yaml create mode 100644 embedded/models/codellama-7b.yaml create mode 100644 embedded/models/transformers-tinyllama.yaml diff --git a/api/backend/options.go b/api/backend/options.go index 3266d602..38f56068 100644 --- a/api/backend/options.go +++ b/api/backend/options.go @@ -63,6 +63,8 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions { F16Memory: c.F16, MLock: c.MMlock, RopeFreqBase: c.RopeFreqBase, + RopeScaling: c.RopeScaling, + Type: c.ModelType, RopeFreqScale: c.RopeFreqScale, NUMA: c.NUMA, Embeddings: c.Embeddings, diff --git a/api/config/config.go b/api/config/config.go index fed83d7a..1b27b574 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -128,7 +128,9 @@ type LLMConfig struct { Quantization string `yaml:"quantization"` MMProj string `yaml:"mmproj"` - RopeScaling string `yaml:"rope_scaling"` + RopeScaling string `yaml:"rope_scaling"` + ModelType string `yaml:"type"` + YarnExtFactor float32 `yaml:"yarn_ext_factor"` YarnAttnFactor float32 `yaml:"yarn_attn_factor"` YarnBetaFast float32 `yaml:"yarn_beta_fast"` diff --git a/backend/backend.proto b/backend/backend.proto index dff5ffe7..e9989aec 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -134,6 +134,8 @@ message ModelOptions { float YarnAttnFactor = 45; float YarnBetaFast = 46; float YarnBetaSlow = 47; + + string Type = 49; } message Result { diff --git a/backend/backend_grpc.pb.go b/backend/backend_grpc.pb.go new file mode 100644 index 00000000..5c97691d --- /dev/null +++ b/backend/backend_grpc.pb.go @@ -0,0 +1,457 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.2.0 +// - protoc v4.23.4 +// source: backend/backend.proto + +package proto + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +// BackendClient is the client API for Backend service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type BackendClient interface { + Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) + Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) + LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) + PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) + Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) + GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) + AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) + TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) + TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) + Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) +} + +type backendClient struct { + cc grpc.ClientConnInterface +} + +func NewBackendClient(cc grpc.ClientConnInterface) BackendClient { + return &backendClient{cc} +} + +func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { + out := new(Reply) + err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { + out := new(Reply) + err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { + out := new(Result) + err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) { + stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...) + if err != nil { + return nil, err + } + x := &backendPredictStreamClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type Backend_PredictStreamClient interface { + Recv() (*Reply, error) + grpc.ClientStream +} + +type backendPredictStreamClient struct { + grpc.ClientStream +} + +func (x *backendPredictStreamClient) Recv() (*Reply, error) { + m := new(Reply) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) { + out := new(EmbeddingResult) + err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) { + out := new(Result) + err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) { + out := new(TranscriptResult) + err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) { + out := new(Result) + err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) { + out := new(TokenizationResponse) + err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) { + out := new(StatusResponse) + err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// BackendServer is the server API for Backend service. +// All implementations must embed UnimplementedBackendServer +// for forward compatibility +type BackendServer interface { + Health(context.Context, *HealthMessage) (*Reply, error) + Predict(context.Context, *PredictOptions) (*Reply, error) + LoadModel(context.Context, *ModelOptions) (*Result, error) + PredictStream(*PredictOptions, Backend_PredictStreamServer) error + Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) + GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) + AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) + TTS(context.Context, *TTSRequest) (*Result, error) + TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) + Status(context.Context, *HealthMessage) (*StatusResponse, error) + mustEmbedUnimplementedBackendServer() +} + +// UnimplementedBackendServer must be embedded to have forward compatible implementations. +type UnimplementedBackendServer struct { +} + +func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) { + return nil, status.Errorf(codes.Unimplemented, "method Health not implemented") +} +func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) { + return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented") +} +func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) { + return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented") +} +func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error { + return status.Errorf(codes.Unimplemented, "method PredictStream not implemented") +} +func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) { + return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented") +} +func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) { + return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented") +} +func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) { + return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented") +} +func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) { + return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented") +} +func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented") +} +func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method Status not implemented") +} +func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {} + +// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to BackendServer will +// result in compilation errors. +type UnsafeBackendServer interface { + mustEmbedUnimplementedBackendServer() +} + +func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) { + s.RegisterService(&Backend_ServiceDesc, srv) +} + +func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(HealthMessage) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).Health(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/Health", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).Health(ctx, req.(*HealthMessage)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PredictOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).Predict(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/Predict", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).Predict(ctx, req.(*PredictOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ModelOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).LoadModel(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/LoadModel", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(PredictOptions) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream}) +} + +type Backend_PredictStreamServer interface { + Send(*Reply) error + grpc.ServerStream +} + +type backendPredictStreamServer struct { + grpc.ServerStream +} + +func (x *backendPredictStreamServer) Send(m *Reply) error { + return x.ServerStream.SendMsg(m) +} + +func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PredictOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).Embedding(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/Embedding", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GenerateImageRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).GenerateImage(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/GenerateImage", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(TranscriptRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).AudioTranscription(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/AudioTranscription", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(TTSRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).TTS(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/TTS", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).TTS(ctx, req.(*TTSRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PredictOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).TokenizeString(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/TokenizeString", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(HealthMessage) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).Status(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/backend.Backend/Status", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).Status(ctx, req.(*HealthMessage)) + } + return interceptor(ctx, in, info, handler) +} + +// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var Backend_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "backend.Backend", + HandlerType: (*BackendServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "Health", + Handler: _Backend_Health_Handler, + }, + { + MethodName: "Predict", + Handler: _Backend_Predict_Handler, + }, + { + MethodName: "LoadModel", + Handler: _Backend_LoadModel_Handler, + }, + { + MethodName: "Embedding", + Handler: _Backend_Embedding_Handler, + }, + { + MethodName: "GenerateImage", + Handler: _Backend_GenerateImage_Handler, + }, + { + MethodName: "AudioTranscription", + Handler: _Backend_AudioTranscription_Handler, + }, + { + MethodName: "TTS", + Handler: _Backend_TTS_Handler, + }, + { + MethodName: "TokenizeString", + Handler: _Backend_TokenizeString_Handler, + }, + { + MethodName: "Status", + Handler: _Backend_Status_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "PredictStream", + Handler: _Backend_PredictStream_Handler, + ServerStreams: true, + }, + }, + Metadata: "backend/backend.proto", +} diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/autogptq/backend_pb2.py +++ b/backend/python/autogptq/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/bark/backend_pb2.py +++ b/backend/python/bark/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/coqui/backend_pb2.py +++ b/backend/python/coqui/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/diffusers/backend_pb2.py +++ b/backend/python/diffusers/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/exllama/backend_pb2.py +++ b/backend/python/exllama/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/exllama2/backend_pb2.py +++ b/backend/python/exllama2/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/mamba/backend_pb2.py +++ b/backend/python/mamba/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/petals/backend_pb2.py +++ b/backend/python/petals/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/sentencetransformers/backend_pb2.py +++ b/backend/python/sentencetransformers/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/transformers-musicgen/backend_pb2.py +++ b/backend/python/transformers-musicgen/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/transformers/backend_pb2.py +++ b/backend/python/transformers/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index e6c06bab..1b177057 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -16,7 +16,7 @@ import backend_pb2_grpc import grpc import torch import torch.cuda -from transformers import AutoTokenizer, AutoModel +from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -68,12 +68,19 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): """ model_name = request.Model try: - self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True) # trust_remote_code is needed to use the encode method with embeddings models like jinai-v2 + if request.Type == "AutoModelForCausalLM": + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + else: + self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True) + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + self.CUDA = False + if request.CUDA or torch.cuda.is_available(): try: print("Loading model", model_name, "to CUDA.", file=sys.stderr) self.model = self.model.to("cuda") + self.CUDA = True except Exception as err: print("Not using CUDA:", err, file=sys.stderr) except Exception as err: @@ -94,6 +101,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): An EmbeddingResult object that contains the calculated embeddings. """ + set_seed(request.Seed) # Tokenize input max_length = 512 if request.Tokens != 0: @@ -120,6 +128,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): Returns: backend_pb2.Reply: The predict result. """ + set_seed(request.Seed) if request.TopP == 0: request.TopP = 0.9 @@ -127,10 +136,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Tokens > 0: max_tokens = request.Tokens - inputs = self.tokenizer.tokenizer(request.Prompt, return_tensors="pt").input_ids - outputs = self.model.generate(inputs,max_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP) + inputs = self.tokenizer(request.Prompt, return_tensors="pt").input_ids + if self.CUDA: + inputs = inputs.to("cuda") - generated_text = self.tokenizer.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] + outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP) + + generated_text = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] # Remove prompt from response if present if request.Prompt in generated_text: generated_text = generated_text.replace(request.Prompt, "") diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/vall-e-x/backend_pb2.py +++ b/backend/python/vall-e-x/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py index a4a46e04..be6191b1 100644 --- a/backend/python/vllm/backend_pb2.py +++ b/backend/python/vllm/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xad\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1820 - _globals['_RESULT']._serialized_start=1822 - _globals['_RESULT']._serialized_end=1864 - _globals['_EMBEDDINGRESULT']._serialized_start=1866 - _globals['_EMBEDDINGRESULT']._serialized_end=1903 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1905 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1972 - _globals['_TRANSCRIPTRESULT']._serialized_start=1974 - _globals['_TRANSCRIPTRESULT']._serialized_end=2052 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2054 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2143 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2146 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2361 - _globals['_TTSREQUEST']._serialized_start=2363 - _globals['_TTSREQUEST']._serialized_end=2417 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2419 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2473 - _globals['_MEMORYUSAGEDATA']._serialized_start=2476 - _globals['_MEMORYUSAGEDATA']._serialized_end=2618 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2570 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2618 - _globals['_STATUSRESPONSE']._serialized_start=2621 - _globals['_STATUSRESPONSE']._serialized_end=2794 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2727 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2794 - _globals['_BACKEND']._serialized_start=2797 - _globals['_BACKEND']._serialized_end=3425 + _globals['_MODELOPTIONS']._serialized_end=1834 + _globals['_RESULT']._serialized_start=1836 + _globals['_RESULT']._serialized_end=1878 + _globals['_EMBEDDINGRESULT']._serialized_start=1880 + _globals['_EMBEDDINGRESULT']._serialized_end=1917 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 + _globals['_TRANSCRIPTRESULT']._serialized_start=1988 + _globals['_TRANSCRIPTRESULT']._serialized_end=2066 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 + _globals['_TTSREQUEST']._serialized_start=2377 + _globals['_TTSREQUEST']._serialized_end=2431 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 + _globals['_MEMORYUSAGEDATA']._serialized_start=2490 + _globals['_MEMORYUSAGEDATA']._serialized_end=2632 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 + _globals['_STATUSRESPONSE']._serialized_start=2635 + _globals['_STATUSRESPONSE']._serialized_end=2808 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 + _globals['_BACKEND']._serialized_start=2811 + _globals['_BACKEND']._serialized_end=3439 # @@protoc_insertion_point(module_scope) diff --git a/docs/content/docs/getting-started/customize-model.md b/docs/content/docs/getting-started/customize-model.md index 0984d6ce..0d1cf21a 100644 --- a/docs/content/docs/getting-started/customize-model.md +++ b/docs/content/docs/getting-started/customize-model.md @@ -26,6 +26,8 @@ Here's an example to initiate the **phi-2** model: docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml ``` +You can also check all the embedded models configurations [here](https://github.com/mudler/LocalAI/tree/master/embedded/models). + {{% alert icon="" %}} The model configurations used in the quickstart are accessible here: [https://github.com/mudler/LocalAI/tree/master/embedded/models](https://github.com/mudler/LocalAI/tree/master/embedded/models). Contributions are welcome; please feel free to submit a Pull Request. diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 89833d82..23d7d70f 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -40,7 +40,7 @@ There are different categories of models: [LLMs]({{%relref "docs/features/text-g {{% alert icon="💡" %}} -To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations). +To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models). {{% /alert %}} {{< tabs tabTotal="3" >}} @@ -64,6 +64,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` | | [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` | | 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | +| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only | +| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | +| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | +| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` | {{% /tab %}} {{% tab tabName="GPU (CUDA 11)" %}} @@ -86,7 +90,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` | | [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` | | 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` | - +| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` | +| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` | +| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` | +| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` | {{% /tab %}} @@ -110,6 +117,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` | | [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` | | 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` | +| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` | +| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | +| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` | +| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` | {{% /tab %}} {{< /tabs >}} diff --git a/embedded/models/animagine-xl.yaml b/embedded/models/animagine-xl.yaml new file mode 100644 index 00000000..d492c080 --- /dev/null +++ b/embedded/models/animagine-xl.yaml @@ -0,0 +1,17 @@ +name: animagine-xl +parameters: + model: Linaqruf/animagine-xl +backend: diffusers +f16: true +diffusers: + scheduler_type: euler_a + +usage: | + curl http://localhost:8080/v1/images/generations \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "|", + "model": "animagine-xl", + "step": 51, + "size": "1024x1024" + }' \ No newline at end of file diff --git a/embedded/models/codellama-7b-gguf.yaml b/embedded/models/codellama-7b-gguf.yaml new file mode 100644 index 00000000..d2a6b518 --- /dev/null +++ b/embedded/models/codellama-7b-gguf.yaml @@ -0,0 +1,16 @@ +name: codellama-7b-gguf +backend: transformers +parameters: + model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf + temperature: 0.2 + top_k: 40 + seed: -1 + top_p: 0.95 +context_size: 4096 +f16: true +gpu_layers: 90 +usage: | + curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ + "model": "codellama-7b-gguf", + "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):" + }' \ No newline at end of file diff --git a/embedded/models/codellama-7b.yaml b/embedded/models/codellama-7b.yaml new file mode 100644 index 00000000..2dae7a8f --- /dev/null +++ b/embedded/models/codellama-7b.yaml @@ -0,0 +1,14 @@ +name: codellama-7b +backend: transformers +parameters: + model: codellama/CodeLlama-7b-hf + temperature: 0.2 + top_k: 40 + seed: -1 + top_p: 0.95 + +usage: | + curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ + "model": "codellama-7b", + "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):" + }' \ No newline at end of file diff --git a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml index b6df4799..ba6020c0 100644 --- a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml +++ b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml @@ -1,7 +1,7 @@ name: dolphin-mixtral-8x7b mmap: true parameters: - model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/blob/main/dolphin-2.5-mixtral-8x7b.Q2_K.gguf + model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf temperature: 0.2 top_k: 40 top_p: 0.95 diff --git a/embedded/models/transformers-tinyllama.yaml b/embedded/models/transformers-tinyllama.yaml new file mode 100644 index 00000000..0fcb64b6 --- /dev/null +++ b/embedded/models/transformers-tinyllama.yaml @@ -0,0 +1,32 @@ +name: tinyllama-chat +backend: transformers +type: AutoModelForCausalLM + +parameters: + model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 + temperature: 0.2 + top_k: 40 + seed: -1 + top_p: 0.95 + max_tokens: 4096 + +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}}<|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + + completion: | + {{.Input}} + +stopwords: +- <|im_end|> + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "tinyllama-chat", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' \ No newline at end of file diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go index b9569785..acf9f375 100644 --- a/pkg/grpc/proto/backend.pb.go +++ b/pkg/grpc/proto/backend.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 -// protoc v3.6.1 +// protoc-gen-go v1.26.0 +// protoc v4.23.4 // source: backend.proto package proto @@ -567,8 +567,7 @@ type ModelOptions struct { CLIPSubfolder string `protobuf:"bytes,32,opt,name=CLIPSubfolder,proto3" json:"CLIPSubfolder,omitempty"` CLIPSkip int32 `protobuf:"varint,33,opt,name=CLIPSkip,proto3" json:"CLIPSkip,omitempty"` ControlNet string `protobuf:"bytes,48,opt,name=ControlNet,proto3" json:"ControlNet,omitempty"` - // RWKV - Tokenizer string `protobuf:"bytes,34,opt,name=Tokenizer,proto3" json:"Tokenizer,omitempty"` + Tokenizer string `protobuf:"bytes,34,opt,name=Tokenizer,proto3" json:"Tokenizer,omitempty"` // LLM (llama.cpp) LoraBase string `protobuf:"bytes,35,opt,name=LoraBase,proto3" json:"LoraBase,omitempty"` LoraAdapter string `protobuf:"bytes,36,opt,name=LoraAdapter,proto3" json:"LoraAdapter,omitempty"` @@ -584,6 +583,7 @@ type ModelOptions struct { YarnAttnFactor float32 `protobuf:"fixed32,45,opt,name=YarnAttnFactor,proto3" json:"YarnAttnFactor,omitempty"` YarnBetaFast float32 `protobuf:"fixed32,46,opt,name=YarnBetaFast,proto3" json:"YarnBetaFast,omitempty"` YarnBetaSlow float32 `protobuf:"fixed32,47,opt,name=YarnBetaSlow,proto3" json:"YarnBetaSlow,omitempty"` + Type string `protobuf:"bytes,49,opt,name=Type,proto3" json:"Type,omitempty"` } func (x *ModelOptions) Reset() { @@ -954,6 +954,13 @@ func (x *ModelOptions) GetYarnBetaSlow() float32 { return 0 } +func (x *ModelOptions) GetType() string { + if x != nil { + return x.Type + } + return "" +} + type Result struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1696,7 +1703,7 @@ var file_backend_proto_rawDesc = []byte{ 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x22, 0xcc, 0x0b, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, + 0x61, 0x67, 0x65, 0x22, 0xe0, 0x0b, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, @@ -1789,131 +1796,132 @@ var file_backend_proto_rawDesc = []byte{ 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, - 0x6f, 0x77, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, - 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, - 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, - 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, - 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, - 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, - 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, - 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, - 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, - 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, - 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, - 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, - 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, - 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, - 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, - 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, - 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, - 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, - 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, - 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, - 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, - 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, - 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, - 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, - 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, - 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, - 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, - 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, - 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, - 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, - 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, - 0x12, 0x10, 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, - 0x72, 0x63, 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, - 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, - 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, - 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x48, 0x0a, 0x0a, 0x54, 0x54, - 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, - 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, - 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x64, 0x73, 0x74, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, - 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6c, 0x65, - 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, - 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac, 0x01, 0x0a, - 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, - 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, - 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, - 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, - 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a, 0x3c, 0x0a, - 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, - 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, - 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01, 0x0a, 0x0e, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, - 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, - 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, 0x06, 0x6d, - 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, - 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45, 0x44, 0x10, - 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, - 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xf4, 0x04, 0x0a, 0x07, 0x42, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, - 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, - 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, - 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, - 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, - 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, - 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, - 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, - 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, - 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, - 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, - 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, - 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, - 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, - 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, - 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, - 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, - 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, - 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, - 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, + 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, + 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, + 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, + 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, + 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, + 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, + 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, + 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, + 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, + 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, + 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, + 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, + 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, + 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, + 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, + 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, + 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, + 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, + 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, + 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, + 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, + 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, + 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, + 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, + 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, + 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, + 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x48, + 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, + 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, + 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, + 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, + 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, + 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, + 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, + 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, + 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, + 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, + 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, + 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, + 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, + 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, + 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, + 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, + 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xf4, + 0x04, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, + 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, + 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, + 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, + 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, + 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, + 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, + 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, + 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, + 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, + 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, + 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, + 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, + 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, + 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, + 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, + 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, + 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, + 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, + 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, + 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, + 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go index d41f77a6..ef5187bc 100644 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ b/pkg/grpc/proto/backend_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.2.0 -// - protoc v3.6.1 +// - protoc v4.23.4 // source: backend.proto package proto From ac19998e5ecbafe924edecdad61279a9e0dff5f0 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 26 Jan 2024 00:13:39 +0100 Subject: [PATCH 0014/2895] :arrow_up: Update ggerganov/llama.cpp (#1644) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f6d7f0f5..0e01b00b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=c9b316c78fba31e65879a2ec91cbafd341b88cce +CPPLLAMA_VERSION?=d292f4f2047963f558dd516f1baaa71793e9acf2 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From f1846ae5acb6682cf35098fe4ece1d569cb6650e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Jan 2024 16:22:54 +0100 Subject: [PATCH 0015/2895] Update phi-2.yaml Signed-off-by: Ettore Di Giacinto --- examples/configurations/phi-2.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/configurations/phi-2.yaml b/examples/configurations/phi-2.yaml index c09aa6ce..8f193866 100644 --- a/examples/configurations/phi-2.yaml +++ b/examples/configurations/phi-2.yaml @@ -15,4 +15,11 @@ template: chat: &template | Instruct: {{.Input}} Output: - completion: *template \ No newline at end of file + completion: *template + +usage: | + To use this model, interact with the API (in another terminal) with curl for instance: + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' From 9f1be45552813950529b6d3dfd2fe7b46b861962 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Jan 2024 17:55:20 +0100 Subject: [PATCH 0016/2895] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 23d7d70f..e786d6d8 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -117,8 +117,8 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` | | [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` | | 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` | -| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` | -| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | +| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` | +| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` | | [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` | {{% /tab %}} From 670cee8274fe15c29e1f9006743e055e62ae831b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Jan 2024 18:29:38 +0100 Subject: [PATCH 0017/2895] Update transformers-tinyllama.yaml Signed-off-by: Ettore Di Giacinto --- embedded/models/transformers-tinyllama.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/embedded/models/transformers-tinyllama.yaml b/embedded/models/transformers-tinyllama.yaml index 0fcb64b6..ee6e7889 100644 --- a/embedded/models/transformers-tinyllama.yaml +++ b/embedded/models/transformers-tinyllama.yaml @@ -6,7 +6,6 @@ parameters: model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 temperature: 0.2 top_k: 40 - seed: -1 top_p: 0.95 max_tokens: 4096 @@ -29,4 +28,4 @@ usage: | "model": "tinyllama-chat", "messages": [{"role": "user", "content": "Say this is a test!"}], "temperature": 0.7 - }' \ No newline at end of file + }' From 072f71dfb78532e7ead9ec583ca888b8bff6aced Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Jan 2024 18:35:33 +0100 Subject: [PATCH 0018/2895] Update codellama-7b.yaml Signed-off-by: Ettore Di Giacinto --- embedded/models/codellama-7b.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/embedded/models/codellama-7b.yaml b/embedded/models/codellama-7b.yaml index 2dae7a8f..151f7515 100644 --- a/embedded/models/codellama-7b.yaml +++ b/embedded/models/codellama-7b.yaml @@ -4,11 +4,10 @@ parameters: model: codellama/CodeLlama-7b-hf temperature: 0.2 top_k: 40 - seed: -1 top_p: 0.95 usage: | curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ "model": "codellama-7b", "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):" - }' \ No newline at end of file + }' From 5a6fd98839029dddabb0f778f4e6833f008dc636 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Jan 2024 00:13:19 +0100 Subject: [PATCH 0019/2895] fix(paths): automatically create paths (#1650) Especially useful when running inside a container. Signed-off-by: Ettore Di Giacinto --- api/api.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/api.go b/api/api.go index fed962db..82e0f69b 100644 --- a/api/api.go +++ b/api/api.go @@ -216,6 +216,11 @@ func App(opts ...options.AppOption) (*fiber.App, error) { }{Version: internal.PrintableVersion()}) }) + // Make sure directories exists + os.MkdirAll(options.ImageDir, 0755) + os.MkdirAll(options.AudioDir, 0755) + os.MkdirAll(options.Loader.ModelPath, 0755) + modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint()) From f928899338b81373b0654b175ce3591651318c98 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 27 Jan 2024 00:13:38 +0100 Subject: [PATCH 0020/2895] :arrow_up: Update ggerganov/llama.cpp (#1652) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e01b00b..399e8d93 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=d292f4f2047963f558dd516f1baaa71793e9acf2 +CPPLLAMA_VERSION?=62fead3ea0a30c8d424f4a8373fa14165c7c707f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 6ac5d814fbb5faa26ed041ab5f7864441f431eef Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Jan 2024 00:14:16 +0100 Subject: [PATCH 0021/2895] feat(startup): fetch model definition remotely (#1654) --- api/api.go | 2 +- api/options/options.go | 8 ++++++++ embedded/embedded.go | 15 +++++++++++++++ main.go | 11 +++++++++++ pkg/startup/model_preload.go | 16 ++++++++++++++-- pkg/startup/model_preload_test.go | 22 +++++++++++++++++++--- 6 files changed, 68 insertions(+), 6 deletions(-) diff --git a/api/api.go b/api/api.go index 82e0f69b..7ec95f1b 100644 --- a/api/api.go +++ b/api/api.go @@ -37,7 +37,7 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) - startup.PreloadModelsConfigurations(options.Loader.ModelPath, options.ModelsURL...) + startup.PreloadModelsConfigurations(options.ModelLibraryURL, options.Loader.ModelPath, options.ModelsURL...) cl := config.NewConfigLoader() if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil { diff --git a/api/options/options.go b/api/options/options.go index e83eaaad..8c066584 100644 --- a/api/options/options.go +++ b/api/options/options.go @@ -28,6 +28,8 @@ type Option struct { ApiKeys []string Metrics *metrics.Metrics + ModelLibraryURL string + Galleries []gallery.Gallery BackendAssets embed.FS @@ -78,6 +80,12 @@ func WithCors(b bool) AppOption { } } +func WithModelLibraryURL(url string) AppOption { + return func(o *Option) { + o.ModelLibraryURL = url + } +} + var EnableWatchDog = func(o *Option) { o.WatchDog = true } diff --git a/embedded/embedded.go b/embedded/embedded.go index a76e87cd..c779fc26 100644 --- a/embedded/embedded.go +++ b/embedded/embedded.go @@ -6,6 +6,8 @@ import ( "slices" "strings" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/assets" "gopkg.in/yaml.v3" ) @@ -30,6 +32,19 @@ func init() { yaml.Unmarshal(modelLibrary, &modelShorteners) } +func GetRemoteLibraryShorteners(url string) (map[string]string, error) { + remoteLibrary := map[string]string{} + + err := downloader.GetURI(url, func(_ string, i []byte) error { + return yaml.Unmarshal(i, &remoteLibrary) + }) + if err != nil { + return nil, fmt.Errorf("error downloading remote library: %s", err.Error()) + } + + return remoteLibrary, err +} + // ExistsInModelsLibrary checks if a model exists in the embedded models library func ExistsInModelsLibrary(s string) bool { f := fmt.Sprintf("%s.yaml", s) diff --git a/main.go b/main.go index 39e38686..d2209285 100644 --- a/main.go +++ b/main.go @@ -26,6 +26,10 @@ import ( "github.com/urfave/cli/v2" ) +const ( + remoteLibraryURL = "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" +) + func main() { log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) // clean up process @@ -94,6 +98,12 @@ func main() { Usage: "JSON list of galleries", EnvVars: []string{"GALLERIES"}, }, + &cli.StringFlag{ + Name: "remote-library", + Usage: "A LocalAI remote library URL", + EnvVars: []string{"REMOTE_LIBRARY"}, + Value: remoteLibraryURL, + }, &cli.StringFlag{ Name: "preload-models", Usage: "A List of models to apply in JSON at start", @@ -219,6 +229,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit options.WithAudioDir(ctx.String("audio-path")), options.WithF16(ctx.Bool("f16")), options.WithStringGalleries(ctx.String("galleries")), + options.WithModelLibraryURL(ctx.String("remote-library")), options.WithDisableMessage(false), options.WithCors(ctx.Bool("cors")), options.WithCorsAllowOrigins(ctx.String("cors-allow-origins")), diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index c23b7b41..cc514334 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -14,10 +14,22 @@ import ( // PreloadModelsConfigurations will preload models from the given list of URLs // It will download the model if it is not already present in the model path // It will also try to resolve if the model is an embedded model YAML configuration -func PreloadModelsConfigurations(modelPath string, models ...string) { +func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { for _, url := range models { - url = embedded.ModelShortURL(url) + // As a best effort, try to resolve the model from the remote library + // if it's not resolved we try with the other method below + if modelLibraryURL != "" { + lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) + if err == nil { + if lib[url] != "" { + log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) + url = lib[url] + } + } + } + + url = embedded.ModelShortURL(url) switch { case embedded.ExistsInModelsLibrary(url): modelYAML, err := embedded.ResolveContent(url) diff --git a/pkg/startup/model_preload_test.go b/pkg/startup/model_preload_test.go index d1e0eab3..63a8f8b0 100644 --- a/pkg/startup/model_preload_test.go +++ b/pkg/startup/model_preload_test.go @@ -15,13 +15,29 @@ import ( var _ = Describe("Preload test", func() { Context("Preloading from strings", func() { + It("loads from remote url", func() { + tmpdir, err := os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" + fileName := fmt.Sprintf("%s.yaml", "1701d57f28d47552516c2b6ecc3cc719") + + PreloadModelsConfigurations(libraryURL, tmpdir, "phi-2") + + resultFile := filepath.Join(tmpdir, fileName) + + content, err := os.ReadFile(resultFile) + Expect(err).ToNot(HaveOccurred()) + + Expect(string(content)).To(ContainSubstring("name: phi-2")) + }) + It("loads from embedded full-urls", func() { tmpdir, err := os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) url := "https://raw.githubusercontent.com/mudler/LocalAI/master/examples/configurations/phi-2.yaml" fileName := fmt.Sprintf("%s.yaml", utils.MD5(url)) - PreloadModelsConfigurations(tmpdir, url) + PreloadModelsConfigurations("", tmpdir, url) resultFile := filepath.Join(tmpdir, fileName) @@ -35,7 +51,7 @@ var _ = Describe("Preload test", func() { Expect(err).ToNot(HaveOccurred()) url := "phi-2" - PreloadModelsConfigurations(tmpdir, url) + PreloadModelsConfigurations("", tmpdir, url) entry, err := os.ReadDir(tmpdir) Expect(err).ToNot(HaveOccurred()) @@ -53,7 +69,7 @@ var _ = Describe("Preload test", func() { url := "mistral-openorca" fileName := fmt.Sprintf("%s.yaml", utils.MD5(url)) - PreloadModelsConfigurations(tmpdir, url) + PreloadModelsConfigurations("", tmpdir, url) resultFile := filepath.Join(tmpdir, fileName) From abd678e147e29ab46a038d28c4806768b2b7cde5 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 28 Jan 2024 09:24:44 +0100 Subject: [PATCH 0022/2895] :arrow_up: Update ggerganov/llama.cpp (#1655) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 399e8d93..14c7cdae 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=62fead3ea0a30c8d424f4a8373fa14165c7c707f +CPPLLAMA_VERSION?=6db2b41a76ee78d5efdd5c3cddd5d7ad3f646855 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From f2ed3df3da224094e78a9e3ed92bcbd1597884fd Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 30 Jan 2024 00:43:18 +0100 Subject: [PATCH 0023/2895] :arrow_up: Update docs version mudler/LocalAI (#1661) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 8f66537b..dad9e122 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.6.1" + "version": "v2.7.0" } From c1bae1ee81815aeae5f6703b78d42a821c9654fa Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 30 Jan 2024 00:43:36 +0100 Subject: [PATCH 0024/2895] :arrow_up: Update ggerganov/llama.cpp (#1656) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 14c7cdae..525fb32a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=6db2b41a76ee78d5efdd5c3cddd5d7ad3f646855 +CPPLLAMA_VERSION?=2aed77eb06a329f0d82bb1c467f4244904d4073f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 555bc0266530ceaa4edb3624fe970c88c497ffab Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 30 Jan 2024 11:36:20 +0100 Subject: [PATCH 0025/2895] Update codellama-7b.yaml Signed-off-by: Ettore Di Giacinto --- embedded/models/codellama-7b.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/embedded/models/codellama-7b.yaml b/embedded/models/codellama-7b.yaml index 151f7515..d9b5c62c 100644 --- a/embedded/models/codellama-7b.yaml +++ b/embedded/models/codellama-7b.yaml @@ -1,5 +1,6 @@ name: codellama-7b backend: transformers +type: AutoModelForCausalLM parameters: model: codellama/CodeLlama-7b-hf temperature: 0.2 From 16cebf0390927929380961cbbd8397d1c8008afd Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 31 Jan 2024 00:38:05 +0100 Subject: [PATCH 0026/2895] :arrow_up: Update ggerganov/llama.cpp (#1665) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 525fb32a..89c54985 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=2aed77eb06a329f0d82bb1c467f4244904d4073f +CPPLLAMA_VERSION?=e0085fdf7c758f0bc2746fc106fb29dd9df959de # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 1c57f8d07704aa6449b7c2c44ecf436e47af4756 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 1 Feb 2024 19:21:52 +0100 Subject: [PATCH 0027/2895] feat(sycl): Add support for Intel GPUs with sycl (#1647) (#1660) * feat(sycl): Add sycl support (#1647) * onekit: install without prompts * set cmake args only in grpc-server Signed-off-by: Ettore Di Giacinto * cleanup * fixup sycl source env * Cleanup docs * ci: runs on self-hosted * fix typo * bump llama.cpp * llama.cpp: update server * adapt to upstream changes * adapt to upstream changes * docs: add sycl --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/image-pr.yml | 7 + .github/workflows/image.yml | 28 + Dockerfile | 9 +- Makefile | 2 +- backend/cpp/llama/CMakeLists.txt | 2 +- backend/cpp/llama/Makefile | 15 + backend/cpp/llama/grpc-server.cpp | 987 +++++------------- backend/cpp/llama/utils.hpp | 510 +++++++++ .../content/docs/features/GPU-acceleration.md | 78 +- docs/content/docs/getting-started/build.md | 13 +- docs/content/docs/overview.md | 22 - docs/content/docs/reference/architecture.md | 25 + entrypoint.sh | 4 + 13 files changed, 932 insertions(+), 770 deletions(-) create mode 100644 backend/cpp/llama/utils.hpp create mode 100644 docs/content/docs/reference/architecture.md diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index c95608c7..8dd699f5 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -75,6 +75,13 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f16-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'arc-runner-set' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index ad13ce05..69c7311f 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -122,6 +122,34 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f16-core' + ffmpeg: 'false' + image-type: 'core' + runs-on: 'arc-runner-set' + - build-type: 'sycl_f32' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f32-core' + ffmpeg: 'false' + image-type: 'core' + runs-on: 'arc-runner-set' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f16-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'arc-runner-set' + - build-type: 'sycl_f32' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f32-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'arc-runner-set' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" diff --git a/Dockerfile b/Dockerfile index ab63d442..f81b5ee3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,6 @@ ARG GO_VERSION=1.21-bullseye ARG IMAGE_TYPE=extras # extras or core - FROM golang:$GO_VERSION as requirements-core ARG BUILD_TYPE @@ -38,6 +37,14 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get update && \ apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi + +# oneapi requirements +RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ + wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \ + sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \ + rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \ + ; fi + ENV PATH /usr/local/cuda/bin:${PATH} # OpenBLAS requirements and stable diffusion diff --git a/Makefile b/Makefile index 89c54985..ae0babaa 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=e0085fdf7c758f0bc2746fc106fb29dd9df959de +CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all diff --git a/backend/cpp/llama/CMakeLists.txt b/backend/cpp/llama/CMakeLists.txt index 7caa10cd..8299705a 100644 --- a/backend/cpp/llama/CMakeLists.txt +++ b/backend/cpp/llama/CMakeLists.txt @@ -70,7 +70,7 @@ add_library(hw_grpc_proto ${hw_proto_srcs} ${hw_proto_hdrs} ) -add_executable(${TARGET} grpc-server.cpp json.hpp ) +add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp) target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto absl::flags_parse gRPC::${_REFLECTION} diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index a64ee1b4..b050b620 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -3,6 +3,7 @@ LLAMA_VERSION?= CMAKE_ARGS?= BUILD_TYPE?= +ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh # If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically ifeq ($(BUILD_TYPE),cublas) @@ -19,6 +20,14 @@ else ifeq ($(BUILD_TYPE),hipblas) CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON endif +ifeq ($(BUILD_TYPE),sycl_f16) + CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON +endif + +ifeq ($(BUILD_TYPE),sycl_f32) + CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +endif + llama.cpp: git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp if [ -z "$(LLAMA_VERSION)" ]; then \ @@ -31,6 +40,7 @@ llama.cpp/examples/grpc-server: cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/ cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/ cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/ + cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/ echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt ## XXX: In some versions of CMake clip wasn't being built before llama. ## This is an hack for now, but it should be fixed in the future. @@ -49,5 +59,10 @@ clean: rm -rf grpc-server grpc-server: llama.cpp llama.cpp/examples/grpc-server +ifneq (,$(findstring sycl,$(BUILD_TYPE))) + bash -c "source $(ONEAPI_VARS); \ + cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release" +else cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release +endif cp llama.cpp/build/bin/grpc-server . \ No newline at end of file diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 76a82a33..35ca6ea5 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -19,6 +19,7 @@ #include "grammar-parser.h" #include "backend.pb.h" #include "backend.grpc.pb.h" +#include "utils.hpp" // include std::regex #include @@ -30,6 +31,7 @@ #include #include #include +#include using grpc::Server; using grpc::ServerBuilder; @@ -42,205 +44,19 @@ using backend::HealthMessage; ///// LLAMA.CPP server code below -#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613" - using json = nlohmann::json; struct server_params { std::string hostname = "127.0.0.1"; - std::string api_key; + std::vector api_keys; std::string public_path = "examples/server/public"; int32_t port = 8080; int32_t read_timeout = 600; int32_t write_timeout = 600; }; -static bool server_verbose = false; - -#if SERVER_VERBOSE != 1 -#define LOG_VERBOSE(MSG, ...) -#else -#define LOG_VERBOSE(MSG, ...) \ - do \ - { \ - if (server_verbose) \ - { \ - server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \ - } \ - } while (0) -#endif - -#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__) -#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__) -#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__) - -json oaicompat_completion_params_parse(const json &body); -std::string format_chatml(std::vector messages); - - -// -// base64 utils (TODO: move to common in the future) -// - -static const std::string base64_chars = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -static inline bool is_base64(uint8_t c) -{ - return (isalnum(c) || (c == '+') || (c == '/')); -} - -static std::vector base64_decode(const std::string & encoded_string) -{ - int i = 0; - int j = 0; - int in_ = 0; - - int in_len = encoded_string.size(); - - uint8_t char_array_4[4]; - uint8_t char_array_3[3]; - - std::vector ret; - - while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) - { - char_array_4[i++] = encoded_string[in_]; in_++; - if (i == 4) - { - for (i = 0; i <4; i++) - { - char_array_4[i] = base64_chars.find(char_array_4[i]); - } - - char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); - char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); - char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; - - for (i = 0; (i < 3); i++) - { - ret.push_back(char_array_3[i]); - } - i = 0; - } - } - - if (i) - { - for (j = i; j <4; j++) - { - char_array_4[j] = 0; - } - - for (j = 0; j <4; j++) - { - char_array_4[j] = base64_chars.find(char_array_4[j]); - } - - char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); - char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); - char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; - - for (j = 0; (j < i - 1); j++) - { - ret.push_back(char_array_3[j]); - } - } - - return ret; -} - -// -// parallel -// - -enum task_type { - TASK_TYPE_COMPLETION, - TASK_TYPE_CANCEL, -}; - -struct task_server { - int id; - int target_id; - task_type type; - json data; - bool infill_mode = false; - bool embedding_mode = false; - int multitask_id = -1; -}; - -struct task_result { - int id; - int multitask_id = -1; - bool stop; - bool error; - json result_json; -}; - -struct task_multi { - int id; - std::set subtasks_remaining{}; - std::vector results{}; -}; - -// TODO: can become bool if we can't find use of more states -enum slot_state -{ - IDLE, - PROCESSING, -}; - -enum slot_command -{ - NONE, - LOAD_PROMPT, - RELEASE, -}; - -struct slot_params -{ - bool stream = true; - bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt - - uint32_t seed = -1; // RNG seed - int32_t n_keep = 0; // number of tokens to keep from initial prompt - int32_t n_predict = -1; // new tokens to predict - - std::vector antiprompt; - - json input_prefix; - json input_suffix; -}; - -struct slot_image -{ - int32_t id; - - bool request_encode_image = false; - float * image_embedding = nullptr; - int32_t image_tokens = 0; - - clip_image_u8 * img_data; - - std::string prefix_prompt; // before of this image -}; - -// completion token output with probabilities -struct completion_token_output -{ - struct token_prob - { - llama_token tok; - float prob; - }; - - std::vector probs; - llama_token tok; - std::string text_to_send; -}; +bool server_verbose = false; static size_t common_part(const std::vector &a, const std::vector &b) { @@ -296,28 +112,6 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end) return ret; } -static void server_log(const char *level, const char *function, int line, - const char *message, const nlohmann::ordered_json &extra) -{ - nlohmann::ordered_json log - { - {"timestamp", time(nullptr)}, - {"level", level}, - {"function", function}, - {"line", line}, - {"message", message}, - }; - - if (!extra.empty()) - { - log.merge_patch(extra); - } - - const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace); - printf("%.*s\n", (int)str.size(), str.data()); - fflush(stdout); -} - // format incomplete utf-8 multibyte character for output static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token) { @@ -359,15 +153,6 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector -static T json_value(const json &body, const std::string &key, const T &default_value) -{ - // Fallback null to default value - return body.contains(key) && !body.at(key).is_null() - ? body.value(key, default_value) - : default_value; -} - struct llama_client_slot { int id; @@ -414,6 +199,12 @@ struct llama_client_slot struct llama_sampling_params sparams; llama_sampling_context *ctx_sampling = nullptr; + int32_t ga_i = 0; // group-attention state + int32_t ga_n = 1; // group-attention factor + int32_t ga_w = 512; // group-attention width + + int32_t n_past_se = 0; // self-extend + // multimodal std::vector images; @@ -442,6 +233,8 @@ struct llama_client_slot sent_count = 0; sent_token_probs_index = 0; infill = false; + ga_i = 0; + n_past_se = 0; generated_token_probs.clear(); @@ -462,7 +255,9 @@ struct llama_client_slot { return true; // limitless } + n_remaining = -1; + if (params.n_predict != -1) { n_remaining = params.n_predict - n_decoded; @@ -471,6 +266,7 @@ struct llama_client_slot { n_remaining = global_params.n_predict - n_decoded; } + return n_remaining > 0; // no budget } @@ -492,7 +288,7 @@ struct llama_client_slot } void release() { - if (state == IDLE || state == PROCESSING) + if (state == PROCESSING) { t_token_generation = (ggml_time_us() - t_start_genereration) / 1e3; command = RELEASE; @@ -540,7 +336,6 @@ struct llama_server_context bool all_slots_are_idle = false; bool add_bos_token = true; - int32_t id_gen; int32_t n_ctx; // total context for all clients / slots // system prompt @@ -555,13 +350,8 @@ struct llama_server_context // slots / clients std::vector slots; - std::vector queue_tasks; - std::vector queue_results; - std::vector queue_multitasks; - std::mutex mutex_tasks; // also guards id_gen, and queue_multitasks - std::condition_variable condition_tasks; - std::mutex mutex_results; - std::condition_variable condition_results; + llama_server_queue queue_tasks; + llama_server_response queue_results; ~llama_server_context() { @@ -620,8 +410,6 @@ struct llama_server_context } void initialize() { - id_gen = 0; - // create slots all_slots_are_idle = true; @@ -634,9 +422,26 @@ struct llama_server_context slot.id = i; slot.n_ctx = n_ctx_slot; - slot.reset(); LOG_TEE(" -> Slot %i - max context: %i\n", slot.id, n_ctx_slot); + + const int ga_n = params.grp_attn_n; + const int ga_w = params.grp_attn_w; + + if (ga_n != 1) { + GGML_ASSERT(ga_n > 0 && "ga_n must be positive"); // NOLINT + GGML_ASSERT(ga_w % ga_n == 0 && "ga_w must be a multiple of ga_n"); // NOLINT + //GGML_ASSERT(n_ctx_train % ga_w == 0 && "n_ctx_train must be a multiple of ga_w"); // NOLINT + //GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * ga_n"); // NOLINT + LOG_TEE(" -> Slot %i - self-extend: ga_n = %d, ga_w = %d\n", slot.id, ga_n, ga_w); + } + + slot.ga_i = 0; + slot.ga_n = ga_n; + slot.ga_w = ga_w; + + slot.reset(); + slots.push_back(slot); } @@ -722,14 +527,6 @@ struct llama_server_context slot_params default_params; llama_sampling_params default_sparams; - if (data.count("__oaicompat") != 0) { - slot->oaicompat = true; - slot->oaicompat_model = json_value(data, "model", std::string(DEFAULT_OAICOMPAT_MODEL)); - } else { - slot->oaicompat = false; - slot->oaicompat_model = ""; - } - slot->params.stream = json_value(data, "stream", false); slot->params.cache_prompt = json_value(data, "cache_prompt", false); slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); @@ -892,7 +689,7 @@ struct llama_server_context while ((pos = prompt.find(pattern, pos)) != std::string::npos) { size_t end_prefix = pos; pos += pattern.length(); - size_t end_pos = prompt.find("]", pos); + size_t end_pos = prompt.find(']', pos); if (end_pos != std::string::npos) { std::string image_id = prompt.substr(pos, end_pos - pos); @@ -1184,39 +981,13 @@ struct llama_server_context void send_error(task_server& task, const std::string &error) { LOG_TEE("task %i - error: %s\n", task.id, error.c_str()); - std::unique_lock lock(mutex_results); task_result res; res.id = task.id; res.multitask_id = task.multitask_id; res.stop = false; res.error = true; res.result_json = { { "content", error } }; - queue_results.push_back(res); - condition_results.notify_all(); - } - - void add_multi_task(int id, std::vector& sub_ids) - { - std::lock_guard lock(mutex_tasks); - task_multi multi; - multi.id = id; - std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end())); - queue_multitasks.push_back(multi); - condition_tasks.notify_one(); - } - - void update_multi_task(int multitask_id, int subtask_id, task_result& result) - { - std::lock_guard lock(mutex_tasks); - for (auto& multitask : queue_multitasks) - { - if (multitask.id == multitask_id) - { - multitask.subtasks_remaining.erase(subtask_id); - multitask.results.push_back(result); - condition_tasks.notify_one(); - } - } + queue_results.send(res); } json get_model_props() @@ -1262,7 +1033,6 @@ struct llama_server_context void send_partial_response(llama_client_slot &slot, completion_token_output tkn) { - std::unique_lock lock(mutex_results); task_result res; res.id = slot.task_id; res.multitask_id = slot.multitask_id; @@ -1297,13 +1067,11 @@ struct llama_server_context res.result_json["model"] = slot.oaicompat_model; } - queue_results.push_back(res); - condition_results.notify_all(); + queue_results.send(res); } void send_final_response(llama_client_slot &slot) { - std::unique_lock lock(mutex_results); task_result res; res.id = slot.task_id; res.multitask_id = slot.multitask_id; @@ -1351,23 +1119,12 @@ struct llama_server_context res.result_json["oaicompat_token_ctr"] = slot.n_decoded; res.result_json["model"] = slot.oaicompat_model; } - queue_results.push_back(res); - condition_results.notify_all(); - - // done with results, unlock - lock.unlock(); - - // parent multitask, if any, needs to be updated - if (slot.multitask_id != -1) - { - update_multi_task(slot.multitask_id, slot.task_id, res); - } + queue_results.send(res); } void send_embedding(llama_client_slot &slot) { - std::unique_lock lock(mutex_results); task_result res; res.id = slot.task_id; res.multitask_id = slot.multitask_id; @@ -1394,15 +1151,13 @@ struct llama_server_context {"embedding", embedding }, }; } - queue_results.push_back(res); - condition_results.notify_all(); + queue_results.send(res); } - int request_completion(json data, bool infill, bool embedding, int multitask_id) + void request_completion(int task_id, json data, bool infill, bool embedding, int multitask_id) { - std::unique_lock lock(mutex_tasks); task_server task; - task.id = id_gen++; + task.id = task_id; task.target_id = 0; task.data = std::move(data); task.infill_mode = infill; @@ -1413,47 +1168,11 @@ struct llama_server_context // when a completion task's prompt array is not a singleton, we split it into multiple requests if (task.data.count("prompt") && task.data.at("prompt").size() > 1) { - lock.unlock(); // entering new func scope - return split_multiprompt_task(task); + split_multiprompt_task(task_id, task); } // otherwise, it's a single-prompt task, we actually queue it - queue_tasks.push_back(task); - condition_tasks.notify_one(); - return task.id; - } - - task_result next_result(int task_id) - { - while (true) - { - std::unique_lock lock(mutex_results); - condition_results.wait(lock, [&]{ - return !queue_results.empty(); - }); - - for (int i = 0; i < (int) queue_results.size(); i++) - { - // for now, tasks that have associated parent multitasks just get erased once multitask picks up the result - if (queue_results[i].multitask_id == task_id) - { - update_multi_task(task_id, queue_results[i].id, queue_results[i]); - queue_results.erase(queue_results.begin() + i); - continue; - } - - if (queue_results[i].id == task_id) - { - assert(queue_results[i].multitask_id == -1); - task_result res = queue_results[i]; - queue_results.erase(queue_results.begin() + i); - return res; - } - } - } - - // never reached - //return task_result{-1, false, false, {}}; + queue_tasks.post(task); } // for multiple images processing @@ -1516,7 +1235,7 @@ struct llama_server_context std::vector append_tokens = tokenize(json_prompt, false); // has next image for (int i = 0; i < (int) append_tokens.size(); ++i) { - llama_batch_add(batch, append_tokens[i], slot.n_past, { slot.id }, true); + llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true); slot.n_past += 1; } } @@ -1526,146 +1245,119 @@ struct llama_server_context void request_cancel(int task_id) { - std::unique_lock lock(mutex_tasks); task_server task; - task.id = id_gen++; task.type = TASK_TYPE_CANCEL; task.target_id = task_id; - queue_tasks.push_back(task); - condition_tasks.notify_one(); + queue_tasks.post(task); } - int split_multiprompt_task(task_server& multiprompt_task) + void split_multiprompt_task(int multitask_id, task_server& multiprompt_task) { int prompt_count = multiprompt_task.data.at("prompt").size(); assert(prompt_count > 1); - int multitask_id = id_gen++; + // generate all the ID for subtask std::vector subtask_ids(prompt_count); for (int i = 0; i < prompt_count; i++) + { + subtask_ids[i] = queue_tasks.get_new_id(); + } + + // queue up the multitask so we can track its subtask progression + queue_tasks.add_multitask(multitask_id, subtask_ids); + + // add subtasks + for (int i = 0; i < prompt_count; i++) { json subtask_data = multiprompt_task.data; subtask_data["prompt"] = subtask_data["prompt"][i]; // subtasks inherit everything else (infill mode, embedding mode, etc.) - subtask_ids[i] = request_completion(subtask_data, multiprompt_task.infill_mode, multiprompt_task.embedding_mode, multitask_id); + request_completion(subtask_ids[i], subtask_data, multiprompt_task.infill_mode, multiprompt_task.embedding_mode, multitask_id); } - - // queue up the multitask so we can track its subtask progression - add_multi_task(multitask_id, subtask_ids); - return multitask_id; } - void process_tasks() + void process_single_task(task_server& task) { - std::unique_lock lock(mutex_tasks); - std::vector deferred_tasks; - while (!queue_tasks.empty()) + switch (task.type) { - task_server task = queue_tasks.front(); - queue_tasks.erase(queue_tasks.begin()); - switch (task.type) - { - case TASK_TYPE_COMPLETION: { - llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); - if (slot == nullptr) - { - // if no slot is available, we defer this task for processing later - deferred_tasks.push_back(task); - break; - } - - if (task.data.contains("system_prompt")) - { - if (!all_slots_are_idle) { - send_error(task, "system prompt can only be updated when all slots are idle"); - break; - } - process_system_prompt_data(task.data["system_prompt"]); - // reset cache_tokens for all slots - for (llama_client_slot &slot : slots) - { - slot.cache_tokens.clear(); - } - } - - slot->reset(); - - slot->infill = task.infill_mode; - slot->embedding = task.embedding_mode; - slot->task_id = task.id; - slot->multitask_id = task.multitask_id; - - if (!launch_slot_with_data(slot, task.data)) - { - // send error result - send_error(task, "internal_error"); - break; - } - } break; - case TASK_TYPE_CANCEL: { // release slot linked with the task id - for (auto & slot : slots) - { - if (slot.task_id == task.target_id) - { - slot.release(); - break; - } - } - } break; - } - } - - // add all the deferred tasks back the the queue - for (task_server &task : deferred_tasks) - { - queue_tasks.push_back(task); - } - - // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue - std::vector agg_results; - auto queue_iterator = queue_multitasks.begin(); - while (queue_iterator != queue_multitasks.end()) - { - if (queue_iterator->subtasks_remaining.empty()) - { - // all subtasks done == multitask is done - task_result aggregate_result; - aggregate_result.id = queue_iterator->id; - aggregate_result.stop = true; - aggregate_result.error = false; - - // collect json results into one json result - std::vector result_jsons; - for (auto& subres : queue_iterator->results) + case TASK_TYPE_COMPLETION: { + llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); + if (slot == nullptr) { - result_jsons.push_back(subres.result_json); - aggregate_result.error = aggregate_result.error && subres.error; + // if no slot is available, we defer this task for processing later + LOG_VERBOSE("no slot is available", {}); + queue_tasks.defer(task); + break; } - aggregate_result.result_json = json{ "results", result_jsons }; - agg_results.push_back(aggregate_result); - condition_results.notify_all(); + if (task.data.contains("system_prompt")) + { + if (!all_slots_are_idle) { + send_error(task, "system prompt can only be updated when all slots are idle"); + break; + } + process_system_prompt_data(task.data["system_prompt"]); - queue_iterator = queue_multitasks.erase(queue_iterator); - } - else - { - ++queue_iterator; - } + // reset cache_tokens for all slots + for (llama_client_slot &slot : slots) + { + slot.cache_tokens.clear(); + slot.n_past = 0; + slot.n_past_se = 0; + } + } + + slot->reset(); + + slot->infill = task.infill_mode; + slot->embedding = task.embedding_mode; + slot->task_id = task.id; + slot->multitask_id = task.multitask_id; + + if (!launch_slot_with_data(slot, task.data)) + { + // send error result + send_error(task, "internal_error"); + break; + } + } break; + case TASK_TYPE_CANCEL: { // release slot linked with the task id + for (auto & slot : slots) + { + if (slot.task_id == task.target_id) + { + slot.release(); + break; + } + } + } break; + case TASK_TYPE_NEXT_RESPONSE: { + // do nothing + } break; } - // done with tasks, unlock - lock.unlock(); + } - // copy aggregate results of complete multi-tasks to the results queue - std::lock_guard lock_results(mutex_results); - queue_results.insert(queue_results.end(), agg_results.begin(), agg_results.end()); + void on_finish_multitask(task_multi& multitask) + { + // all subtasks done == multitask is done + task_result result; + result.id = multitask.id; + result.stop = true; + result.error = false; + + // collect json results into one json result + std::vector result_jsons; + for (auto& subres : multitask.results) + { + result_jsons.push_back(subres.result_json); + result.error = result.error && subres.error; + } + result.result_json = json{ { "results", result_jsons } }; + queue_results.send(result); } bool update_slots() { - // attend tasks - process_tasks(); - if (system_need_update) { LOG_TEE("updating system prompt\n"); @@ -1681,40 +1373,45 @@ struct llama_server_context LOG_TEE("all slots are idle and system prompt is empty, clear the KV cache\n"); kv_cache_clear(); } - std::unique_lock lock(mutex_tasks); - condition_tasks.wait(lock, [&]{ - return !queue_tasks.empty(); - }); + return true; } + task_server task; + task.type = TASK_TYPE_NEXT_RESPONSE; + task.target_id = -1; + queue_tasks.post(task); + for (llama_client_slot &slot : slots) { - if (slot.is_processing() && slot.cache_tokens.size() >= (size_t) slot.n_ctx) + if (slot.ga_n == 1) { - // Shift context - const int n_left = slot.n_past - slot.params.n_keep - 1; - const int n_discard = n_left / 2; - - LOG_TEE("slot %d: context shift - n_keep = %d, n_left = %d, n_discard = %d\n", slot.id, slot.params.n_keep, n_left, n_discard); - llama_kv_cache_seq_rm (ctx, slot.id, slot.params.n_keep + 1 , slot.params.n_keep + n_discard + 1); - llama_kv_cache_seq_shift(ctx, slot.id, slot.params.n_keep + 1 + n_discard, slot.n_past, -n_discard); - - for (size_t i = slot.params.n_keep + 1 + n_discard; i < slot.cache_tokens.size(); i++) + if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx) { - slot.cache_tokens[i - n_discard] = slot.cache_tokens[i]; + // Shift context + const int n_left = system_tokens.size() + slot.n_past - slot.params.n_keep - 1; + const int n_discard = n_left / 2; + + LOG_TEE("slot %d: context shift - n_keep = %d, n_left = %d, n_discard = %d\n", slot.id, slot.params.n_keep, n_left, n_discard); + llama_kv_cache_seq_rm (ctx, slot.id, slot.params.n_keep + 1 , slot.params.n_keep + n_discard + 1); + llama_kv_cache_seq_shift(ctx, slot.id, slot.params.n_keep + 1 + n_discard, system_tokens.size() + slot.n_past, -n_discard); + + for (size_t i = slot.params.n_keep + 1 + n_discard; i < slot.cache_tokens.size(); i++) + { + slot.cache_tokens[i - n_discard] = slot.cache_tokens[i]; + } + + slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard); + + slot.n_past -= n_discard; + + slot.truncated = true; + + LOG_VERBOSE("context shift", { + { "n_ctx", n_ctx }, + { "n_keep", params.n_keep }, + { "n_left", n_left }, + }); } - - slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard); - - slot.n_past -= n_discard; - - slot.truncated = true; - - LOG_VERBOSE("context shift", { - {"n_ctx", n_ctx}, - {"n_keep", params.n_keep}, - {"n_left", n_left}, - }); } } @@ -1729,6 +1426,7 @@ struct llama_server_context slot.t_last_used = ggml_time_us(); LOG_TEE("slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size()); + queue_tasks.notify_slot_changed(); continue; } @@ -1740,8 +1438,11 @@ struct llama_server_context slot.i_batch = batch.n_tokens; - llama_batch_add(batch, slot.sampled, system_tokens.size() + slot.n_past, { slot.id }, true); + const int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past; + // TODO: we always have to take into account the "system_tokens" + // this is not great and needs to be improved somehow + llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true); slot.n_past += 1; } @@ -1792,8 +1493,8 @@ struct llama_server_context prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model)); prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS - prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model)); - prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); + prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model)); + prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); prefix_tokens.push_back(llama_token_middle(model)); prompt_tokens = prefix_tokens; } @@ -1838,6 +1539,8 @@ struct llama_server_context llama_sampling_reset(slot.ctx_sampling); slot.n_past = 0; + slot.n_past_se = 0; + slot.ga_i = 0; slot.num_prompt_tokens_processed = slot.num_prompt_tokens; } else @@ -1851,6 +1554,25 @@ struct llama_server_context slot.n_past = common_part(slot.cache_tokens, prompt_tokens); slot.num_prompt_tokens_processed = slot.num_prompt_tokens - slot.n_past; + if (slot.ga_n != 1) + { + int ga_i = 0; + int32_t ga_n = slot.ga_n; + int32_t ga_w = slot.ga_w; + int32_t slot_npast = 0; + for (int k = 0; k < slot.n_past; ++k) + { + while (slot_npast >= ga_i + ga_w) { + const int bd = (ga_w/ga_n)*(ga_n - 1); + slot_npast -= bd; + ga_i += ga_w/ga_n; + } + slot_npast++; + } + slot.n_past_se = slot_npast; + slot.ga_i = ga_i; + } + LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed); } @@ -1865,11 +1587,15 @@ struct llama_server_context // we have to evaluate at least 1 token to generate logits. LOG_TEE("slot %d : we have to evaluate at least 1 token to generate logits\n", slot.id); slot.n_past--; + if (slot.ga_i > 0) + { + slot.n_past_se--; + } } LOG_VERBOSE("prompt ingested", { - {"n_past", slot.n_past}, - {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)}, + {"n_past", slot.n_past}, + {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)}, {"to_eval", tokens_to_str(ctx, slot.cache_tokens.cbegin() + slot.n_past, slot.cache_tokens.cend())}, }); @@ -1877,9 +1603,25 @@ struct llama_server_context // process the prefix of first image std::vector prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens; + + int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past; + + int32_t ga_i = slot.ga_i; + int32_t ga_n = slot.ga_n; + int32_t ga_w = slot.ga_w; + for (; slot.n_past < (int) prefix_tokens.size(); ++slot.n_past) { - llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot.n_past, { slot.id }, false); + if (slot.ga_n != 1) + { + while (slot_npast >= ga_i + ga_w) { + const int bd = (ga_w/ga_n)*(ga_n - 1); + slot_npast -= bd; + ga_i += ga_w/ga_n; + } + } + llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false); + slot_npast++; } if (has_images && !ingest_images(slot, n_batch)) @@ -1909,6 +1651,37 @@ struct llama_server_context for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) { const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i)); + + for (auto & slot : slots) + { + if (slot.ga_n != 1) + { + // context extension via Self-Extend + while (slot.n_past_se >= slot.ga_i + slot.ga_w) + { + const int ib = (slot.ga_n * slot.ga_i) / slot.ga_w; + const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1); + const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w; + + LOG_TEE("\n"); + LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd); + LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n); + LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd); + + llama_kv_cache_seq_shift(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd); + llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n); + llama_kv_cache_seq_shift(ctx, slot.id, slot.ga_i + ib * bd + slot.ga_w,slot.n_past_se + ib * bd, dd); + + slot.n_past_se -= bd; + + slot.ga_i += slot.ga_w / slot.ga_n; + + LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i); + } + slot.n_past_se += n_tokens; + } + } + llama_batch batch_view = { n_tokens, @@ -1922,6 +1695,7 @@ struct llama_server_context }; const int ret = llama_decode(ctx, batch_view); + if (ret != 0) { if (n_batch == 1 || ret < 0) @@ -1994,242 +1768,13 @@ struct llama_server_context } return true; } + + void run_on_all_tasks_finished() { + update_slots(); + } }; - -static std::string random_string() -{ - static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); - - std::random_device rd; - std::mt19937 generator(rd()); - - std::string result(32, ' '); - - for (int i = 0; i < 32; ++i) { - result[i] = str[generator() % str.size()]; - } - - return result; -} - -static std::string gen_chatcmplid() -{ - std::stringstream chatcmplid; - chatcmplid << "chatcmpl-" << random_string(); - return chatcmplid.str(); -} - -std::string format_chatml(std::vector messages) -{ - std::ostringstream chatml_msgs; - - for (auto it = messages.begin(); it != messages.end(); ++it) { - chatml_msgs << "<|im_start|>" - << json_value(*it, "role", std::string("user")) << '\n'; - chatml_msgs << json_value(*it, "content", std::string("")) - << "<|im_end|>\n"; - } - - chatml_msgs << "<|im_start|>assistant" << '\n'; - - return chatml_msgs.str(); -} - /* llama.cpp completion api semantics */ -json oaicompat_completion_params_parse( - const json &body /* openai api json semantics */) -{ - json llama_params; - - llama_params["__oaicompat"] = true; - - // Map OpenAI parameters to llama.cpp parameters - // - // For parameters that are defined by the OpenAI documentation (e.g. - // temperature), we explicitly specify OpenAI's intended default; we - // need to do that because sometimes OpenAI disagrees with llama.cpp - // - // https://platform.openai.com/docs/api-reference/chat/create - llama_sampling_params default_sparams; - llama_params["model"] = json_value(body, "model", std::string("unknown")); - llama_params["prompt"] = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt' - llama_params["cache_prompt"] = json_value(body, "cache_prompt", false); - llama_params["temperature"] = json_value(body, "temperature", 0.0); - llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k); - llama_params["top_p"] = json_value(body, "top_p", 1.0); - llama_params["n_predict"] = json_value(body, "max_tokens", -1); - llama_params["logit_bias"] = json_value(body, "logit_bias",json::object()); - llama_params["frequency_penalty"] = json_value(body, "frequency_penalty", 0.0); - llama_params["presence_penalty"] = json_value(body, "presence_penalty", 0.0); - llama_params["seed"] = json_value(body, "seed", LLAMA_DEFAULT_SEED); - llama_params["stream"] = json_value(body, "stream", false); - llama_params["mirostat"] = json_value(body, "mirostat", default_sparams.mirostat); - llama_params["mirostat_tau"] = json_value(body, "mirostat_tau", default_sparams.mirostat_tau); - llama_params["mirostat_eta"] = json_value(body, "mirostat_eta", default_sparams.mirostat_eta); - llama_params["penalize_nl"] = json_value(body, "penalize_nl", default_sparams.penalize_nl); - llama_params["typical_p"] = json_value(body, "typical_p", default_sparams.typical_p); - llama_params["repeat_last_n"] = json_value(body, "repeat_last_n", default_sparams.penalty_last_n); - llama_params["ignore_eos"] = json_value(body, "ignore_eos", false); - llama_params["tfs_z"] = json_value(body, "tfs_z", default_sparams.tfs_z); - - if (body.count("grammar") != 0) { - llama_params["grammar"] = json_value(body, "grammar", json::object()); - } - - // Handle 'stop' field - if (body.contains("stop") && body["stop"].is_string()) { - llama_params["stop"] = json::array({body["stop"].get()}); - } else { - llama_params["stop"] = json_value(body, "stop", json::array()); - } - - // Ensure there is ChatML-specific end sequence among stop words - llama_params["stop"].push_back("<|im_end|>"); - - return llama_params; -} - -static json format_final_response_oaicompat(const json &request, const task_result &response, bool streaming = false) -{ - json result = response.result_json; - - bool stopped_word = result.count("stopped_word") != 0; - bool stopped_eos = json_value(result, "stopped_eos", false); - int num_tokens_predicted = json_value(result, "tokens_predicted", 0); - int num_prompt_tokens = json_value(result, "tokens_evaluated", 0); - std::string content = json_value(result, "content", std::string("")); - - std::string finish_reason = "length"; - if (stopped_word || stopped_eos) { - finish_reason = "stop"; - } - - json choices = - streaming ? json::array({json{{"finish_reason", finish_reason}, - {"index", 0}, - {"delta", json::object()}}}) - : json::array({json{{"finish_reason", finish_reason}, - {"index", 0}, - {"message", json{{"content", content}, - {"role", "assistant"}}}}}); - - std::time_t t = std::time(0); - - json res = - json{{"choices", choices}, - {"created", t}, - {"model", - json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))}, - {"object", streaming ? "chat.completion.chunk" : "chat.completion"}, - {"usage", - json{{"completion_tokens", num_tokens_predicted}, - {"prompt_tokens", num_prompt_tokens}, - {"total_tokens", num_tokens_predicted + num_prompt_tokens}}}, - {"id", gen_chatcmplid()}}; - - if (server_verbose) { - res["__verbose"] = result; - } - - if (result.contains("completion_probabilities")) { - res["completion_probabilities"] = json_value(result, "completion_probabilities", json::array()); - } - - return res; -} - -// return value is vector as there is one case where we might need to generate two responses -static std::vector format_partial_response_oaicompat(const task_result &response) { - json result = response.result_json; - - if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) { - return std::vector({response.result_json}); - } - - bool first = json_value(result, "oaicompat_token_ctr", 0) == 0; - std::string modelname = json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL)); - - bool stopped_word = json_value(result, "stopped_word", false); - bool stopped_eos = json_value(result, "stopped_eos", false); - bool stopped_limit = json_value(result, "stopped_limit", false); - std::string content = json_value(result, "content", std::string("")); - - std::string finish_reason; - if (stopped_word || stopped_eos) { - finish_reason = "stop"; - } - if (stopped_limit) { - finish_reason = "length"; - } - - std::time_t t = std::time(0); - - json choices; - - if (!finish_reason.empty()) { - choices = json::array({json{{"finish_reason", finish_reason}, - {"index", 0}, - {"delta", json::object()}}}); - } else { - if (first) { - if (content.empty()) { - choices = json::array({json{{"finish_reason", nullptr}, - {"index", 0}, - {"delta", json{{"role", "assistant"}}}}}); - } else { - // We have to send this as two updates to conform to openai behavior - json initial_ret = json{{"choices", json::array({json{ - {"finish_reason", nullptr}, - {"index", 0}, - {"delta", json{ - {"role", "assistant"} - }}}})}, - {"created", t}, - {"id", gen_chatcmplid()}, - {"model", modelname}, - {"object", "chat.completion.chunk"}}; - - json second_ret = json{ - {"choices", json::array({json{{"finish_reason", nullptr}, - {"index", 0}, - {"delta", json{ - {"content", content}}} - }})}, - {"created", t}, - {"id", gen_chatcmplid()}, - {"model", modelname}, - {"object", "chat.completion.chunk"}}; - - return std::vector({initial_ret, second_ret}); - } - } else { - // Some idiosyncrasy in task processing logic makes several trailing calls - // with empty content, we ignore these at the calee site. - if (content.empty()) { - return std::vector({json::object()}); - } - - choices = json::array({json{ - {"finish_reason", nullptr}, - {"index", 0}, - {"delta", - json{ - {"content", content}, - }}, - }}); - } - } - - json ret = json{{"choices", choices}, - {"created", t}, - {"id", gen_chatcmplid()}, - {"model", modelname}, - {"object", "chat.completion.chunk"}}; - - return std::vector({ret}); -} - static json format_partial_response( llama_server_context &llama, llama_client_slot *slot, const std::string &content, const std::vector &probs ) { @@ -2301,12 +1846,20 @@ static void start_llama_server() { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - bool running = true; - while (running) - { - running = llama.update_slots(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } + llama.queue_tasks.on_new_task(std::bind( + &llama_server_context::process_single_task, &llama, std::placeholders::_1)); + llama.queue_tasks.on_finish_multitask(std::bind( + &llama_server_context::on_finish_multitask, &llama, std::placeholders::_1)); + llama.queue_tasks.on_all_tasks_finished(std::bind( + &llama_server_context::run_on_all_tasks_finished, &llama)); + llama.queue_results.on_multitask_update(std::bind( + &llama_server_queue::update_multitask, + &llama.queue_tasks, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3 + )); + llama.queue_tasks.start_loop(); } json parse_options(bool streaming, const backend::PredictOptions* predict, llama_server_context &llama) @@ -2479,9 +2032,9 @@ static void params_parse(const backend::ModelOptions* request, std::sregex_token_iterator it{ arg_next.begin(), arg_next.end(), regex, -1 }; std::vector split_arg{ it, {} }; - GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + GGML_ASSERT(split_arg.size() <= llama_max_devices()); - for (size_t i_device = 0; i_device < LLAMA_MAX_DEVICES; ++i_device) { + for (size_t i_device = 0; i_device < llama_max_devices(); ++i_device) { if (i_device < split_arg.size()) { params.tensor_split[i_device] = std::stof(split_arg[i_device]); } @@ -2563,10 +2116,12 @@ public: } grpc::Status PredictStream(grpc::ServerContext* context, const backend::PredictOptions* request, grpc::ServerWriter* writer) override { json data = parse_options(true, request, llama); - const int task_id = llama.request_completion(data, false, false, -1); + const int task_id = llama.queue_tasks.get_new_id(); + llama.queue_results.add_waiting_task_id(task_id); + llama.request_completion(task_id, data, false, false, -1); while (true) { - task_result result = llama.next_result(task_id); + task_result result = llama.queue_results.recv(task_id); if (!result.error) { const std::string str = "data: " + @@ -2599,9 +2154,11 @@ public: grpc::Status Predict(ServerContext* context, const backend::PredictOptions* request, backend::Reply* reply) { json data = parse_options(false, request, llama); - const int task_id = llama.request_completion(data, false, false, -1); + const int task_id = llama.queue_tasks.get_new_id(); + llama.queue_results.add_waiting_task_id(task_id); + llama.request_completion(task_id, data, false, false, -1); std::string completion_text; - task_result result = llama.next_result(task_id); + task_result result = llama.queue_results.recv(task_id); if (!result.error && result.stop) { completion_text = result.result_json.value("content", ""); reply->set_message(completion_text); diff --git a/backend/cpp/llama/utils.hpp b/backend/cpp/llama/utils.hpp new file mode 100644 index 00000000..c5dafbf0 --- /dev/null +++ b/backend/cpp/llama/utils.hpp @@ -0,0 +1,510 @@ +// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "json.hpp" + +#include "../llava/clip.h" + +using json = nlohmann::json; + +extern bool server_verbose; + +#ifndef SERVER_VERBOSE +#define SERVER_VERBOSE 1 +#endif + +#if SERVER_VERBOSE != 1 +#define LOG_VERBOSE(MSG, ...) +#else +#define LOG_VERBOSE(MSG, ...) \ + do \ + { \ + if (server_verbose) \ + { \ + server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \ + } \ + } while (0) +#endif + +#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__) +#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__) +#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__) + +// +// parallel +// + +enum server_state { + SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet + SERVER_STATE_READY, // Server is ready and model is loaded + SERVER_STATE_ERROR // An error occurred, load_model failed +}; + +enum task_type { + TASK_TYPE_COMPLETION, + TASK_TYPE_CANCEL, + TASK_TYPE_NEXT_RESPONSE +}; + +struct task_server { + int id = -1; // to be filled by llama_server_queue + int target_id; + task_type type; + json data; + bool infill_mode = false; + bool embedding_mode = false; + int multitask_id = -1; +}; + +struct task_result { + int id; + int multitask_id = -1; + bool stop; + bool error; + json result_json; +}; + +struct task_multi { + int id; + std::set subtasks_remaining{}; + std::vector results{}; +}; + +// TODO: can become bool if we can't find use of more states +enum slot_state +{ + IDLE, + PROCESSING, +}; + +enum slot_command +{ + NONE, + LOAD_PROMPT, + RELEASE, +}; + +struct slot_params +{ + bool stream = true; + bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt + + uint32_t seed = -1; // RNG seed + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_predict = -1; // new tokens to predict + + std::vector antiprompt; + + json input_prefix; + json input_suffix; +}; + +struct slot_image +{ + int32_t id; + + bool request_encode_image = false; + float * image_embedding = nullptr; + int32_t image_tokens = 0; + + clip_image_u8 * img_data; + + std::string prefix_prompt; // before of this image +}; + +// completion token output with probabilities +struct completion_token_output +{ + struct token_prob + { + llama_token tok; + float prob; + }; + + std::vector probs; + llama_token tok; + std::string text_to_send; +}; + +static inline void server_log(const char *level, const char *function, int line, + const char *message, const nlohmann::ordered_json &extra) +{ + nlohmann::ordered_json log + { + {"timestamp", time(nullptr)}, + {"level", level}, + {"function", function}, + {"line", line}, + {"message", message}, + }; + + if (!extra.empty()) + { + log.merge_patch(extra); + } + + const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace); + printf("%.*s\n", (int)str.size(), str.data()); + fflush(stdout); +} + +// +// server utils +// + +template +static T json_value(const json &body, const std::string &key, const T &default_value) +{ + // Fallback null to default value + return body.contains(key) && !body.at(key).is_null() + ? body.value(key, default_value) + : default_value; +} + +inline std::string format_chatml(std::vector messages) +{ + std::ostringstream chatml_msgs; + + for (auto it = messages.begin(); it != messages.end(); ++it) { + chatml_msgs << "<|im_start|>" + << json_value(*it, "role", std::string("user")) << '\n'; + chatml_msgs << json_value(*it, "content", std::string("")) + << "<|im_end|>\n"; + } + + chatml_msgs << "<|im_start|>assistant" << '\n'; + + return chatml_msgs.str(); +} + +// +// work queue utils +// + +struct llama_server_queue { + int id = 0; + std::mutex mutex_tasks; + // queues + std::vector queue_tasks; + std::vector queue_tasks_deferred; + std::vector queue_multitasks; + std::condition_variable condition_tasks; + // callback functions + std::function callback_new_task; + std::function callback_finish_multitask; + std::function callback_all_task_finished; + + // Add a new task to the end of the queue + int post(task_server task) { + std::unique_lock lock(mutex_tasks); + if (task.id == -1) { + task.id = id++; + } + queue_tasks.push_back(std::move(task)); + condition_tasks.notify_one(); + return task.id; + } + + // Add a new task, but defer until one slot is available + void defer(task_server task) { + std::unique_lock lock(mutex_tasks); + queue_tasks_deferred.push_back(std::move(task)); + } + + // Get the next id for creating anew task + int get_new_id() { + std::unique_lock lock(mutex_tasks); + return id++; + } + + // Register function to process a new task + void on_new_task(std::function callback) { + callback_new_task = callback; + } + + // Register function to process a multitask + void on_finish_multitask(std::function callback) { + callback_finish_multitask = callback; + } + + // Register the function to be called when the batch of tasks is finished + void on_all_tasks_finished(std::function callback) { + callback_all_task_finished = callback; + } + + // Call when the state of one slot is changed + void notify_slot_changed() { + // move deferred tasks back to main loop + std::unique_lock lock(mutex_tasks); + for (auto & task : queue_tasks_deferred) { + queue_tasks.push_back(std::move(task)); + } + queue_tasks_deferred.clear(); + } + + // Start the main loop. This call is blocking + [[noreturn]] + void start_loop() { + while (true) { + // new task arrived + LOG_VERBOSE("have new task", {}); + { + while (true) + { + std::unique_lock lock(mutex_tasks); + if (queue_tasks.empty()) { + lock.unlock(); + break; + } + task_server task = queue_tasks.front(); + queue_tasks.erase(queue_tasks.begin()); + lock.unlock(); + LOG_VERBOSE("callback_new_task", {}); + callback_new_task(task); + } + LOG_VERBOSE("callback_all_task_finished", {}); + // process and update all the multitasks + auto queue_iterator = queue_multitasks.begin(); + while (queue_iterator != queue_multitasks.end()) + { + if (queue_iterator->subtasks_remaining.empty()) + { + // all subtasks done == multitask is done + task_multi current_multitask = *queue_iterator; + callback_finish_multitask(current_multitask); + // remove this multitask + queue_iterator = queue_multitasks.erase(queue_iterator); + } + else + { + ++queue_iterator; + } + } + // all tasks in the current loop is finished + callback_all_task_finished(); + } + LOG_VERBOSE("wait for new task", {}); + // wait for new task + { + std::unique_lock lock(mutex_tasks); + if (queue_tasks.empty()) { + condition_tasks.wait(lock, [&]{ + return !queue_tasks.empty(); + }); + } + } + } + } + + // + // functions to manage multitasks + // + + // add a multitask by specifying the id of all subtask (subtask is a task_server) + void add_multitask(int multitask_id, std::vector& sub_ids) + { + std::lock_guard lock(mutex_tasks); + task_multi multi; + multi.id = multitask_id; + std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end())); + queue_multitasks.push_back(multi); + } + + // updatethe remaining subtasks, while appending results to multitask + void update_multitask(int multitask_id, int subtask_id, task_result& result) + { + std::lock_guard lock(mutex_tasks); + for (auto& multitask : queue_multitasks) + { + if (multitask.id == multitask_id) + { + multitask.subtasks_remaining.erase(subtask_id); + multitask.results.push_back(result); + } + } + } +}; + +struct llama_server_response { + typedef std::function callback_multitask_t; + callback_multitask_t callback_update_multitask; + // for keeping track of all tasks waiting for the result + std::set waiting_task_ids; + // the main result queue + std::vector queue_results; + std::mutex mutex_results; + std::condition_variable condition_results; + + void add_waiting_task_id(int task_id) { + std::unique_lock lock(mutex_results); + waiting_task_ids.insert(task_id); + } + + void remove_waiting_task_id(int task_id) { + std::unique_lock lock(mutex_results); + waiting_task_ids.erase(task_id); + } + + // This function blocks the thread until there is a response for this task_id + task_result recv(int task_id) { + while (true) + { + std::unique_lock lock(mutex_results); + condition_results.wait(lock, [&]{ + return !queue_results.empty(); + }); + LOG_VERBOSE("condition_results unblock", {}); + + for (int i = 0; i < (int) queue_results.size(); i++) + { + if (queue_results[i].id == task_id) + { + assert(queue_results[i].multitask_id == -1); + task_result res = queue_results[i]; + queue_results.erase(queue_results.begin() + i); + return res; + } + } + } + + // should never reach here + } + + // Register the function to update multitask + void on_multitask_update(callback_multitask_t callback) { + callback_update_multitask = callback; + } + + // Send a new result to a waiting task_id + void send(task_result result) { + std::unique_lock lock(mutex_results); + LOG_VERBOSE("send new result", {}); + for (auto& task_id : waiting_task_ids) { + // LOG_TEE("waiting task id %i \n", task_id); + // for now, tasks that have associated parent multitasks just get erased once multitask picks up the result + if (result.multitask_id == task_id) + { + LOG_VERBOSE("callback_update_multitask", {}); + callback_update_multitask(task_id, result.id, result); + continue; + } + + if (result.id == task_id) + { + LOG_VERBOSE("queue_results.push_back", {}); + queue_results.push_back(result); + condition_results.notify_one(); + return; + } + } + } +}; + +// +// base64 utils (TODO: move to common in the future) +// + +static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +static inline bool is_base64(uint8_t c) +{ + return (isalnum(c) || (c == '+') || (c == '/')); +} + +static inline std::vector base64_decode(const std::string & encoded_string) +{ + int i = 0; + int j = 0; + int in_ = 0; + + int in_len = encoded_string.size(); + + uint8_t char_array_4[4]; + uint8_t char_array_3[3]; + + std::vector ret; + + while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) + { + char_array_4[i++] = encoded_string[in_]; in_++; + if (i == 4) + { + for (i = 0; i <4; i++) + { + char_array_4[i] = base64_chars.find(char_array_4[i]); + } + + char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) + { + ret.push_back(char_array_3[i]); + } + i = 0; + } + } + + if (i) + { + for (j = i; j <4; j++) + { + char_array_4[j] = 0; + } + + for (j = 0; j <4; j++) + { + char_array_4[j] = base64_chars.find(char_array_4[j]); + } + + char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; (j < i - 1); j++) + { + ret.push_back(char_array_3[j]); + } + } + + return ret; +} + +// +// random string / id +// + +static std::string random_string() +{ + static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + + std::random_device rd; + std::mt19937 generator(rd()); + + std::string result(32, ' '); + + for (int i = 0; i < 32; ++i) { + result[i] = str[generator() % str.size()]; + } + + return result; +} + +static std::string gen_chatcmplid() +{ + std::stringstream chatcmplid; + chatcmplid << "chatcmpl-" << random_string(); + return chatcmplid.str(); +} \ No newline at end of file diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index cef08e71..9688e787 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -15,9 +15,45 @@ This section contains instruction on how to use LocalAI with GPU acceleration. For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) {{% /alert %}} -### CUDA(NVIDIA) acceleration -#### Requirements +## Model configuration + +Depending on the model architecture and backend used, there might be different ways to enable GPU acceleration. It is required to configure the model you intend to use with a YAML config file. For example, for `llama.cpp` workloads a configuration file might look like this (where `gpu_layers` is the number of layers to offload to the GPU): + +```yaml +name: my-model-name +# Default model parameters +parameters: + # Relative to the models path + model: llama.cpp-model.ggmlv3.q5_K_M.bin + +context_size: 1024 +threads: 1 + +f16: true # enable with GPU acceleration +gpu_layers: 22 # GPU Layers (only used when built with cublas) + +``` + +For diffusers instead, it might look like this instead: + +```yaml +name: stablediffusion +parameters: + model: toonyou_beta6.safetensors +backend: diffusers +step: 30 +f16: true +diffusers: + pipeline_type: StableDiffusionPipeline + cuda: true + enable_parameters: "negative_prompt,num_inference_steps,clip_skip" + scheduler_type: "k_dpmpp_sde" +``` + +## CUDA(NVIDIA) acceleration + +### Requirements Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)) @@ -74,37 +110,21 @@ llama_model_load_internal: total VRAM used: 1598 MB llama_init_from_file: kv self size = 512.00 MB ``` -#### Model configuration +## Intel acceleration (sycl) -Depending on the model architecture and backend used, there might be different ways to enable GPU acceleration. It is required to configure the model you intend to use with a YAML config file. For example, for `llama.cpp` workloads a configuration file might look like this (where `gpu_layers` is the number of layers to offload to the GPU): +#### Requirements -```yaml -name: my-model-name -# Default model parameters -parameters: - # Relative to the models path - model: llama.cpp-model.ggmlv3.q5_K_M.bin +Requirement: [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) -context_size: 1024 -threads: 1 +To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ... -f16: true # enable with GPU acceleration -gpu_layers: 22 # GPU Layers (only used when built with cublas) +The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags). +### Notes + +In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example: + +```bash +docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core ``` -For diffusers instead, it might look like this instead: - -```yaml -name: stablediffusion -parameters: - model: toonyou_beta6.safetensors -backend: diffusers -step: 30 -f16: true -diffusers: - pipeline_type: StableDiffusionPipeline - cuda: true - enable_parameters: "negative_prompt,num_inference_steps,clip_skip" - scheduler_type: "k_dpmpp_sde" -``` \ No newline at end of file diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index c2a7e0ee..b26a16d7 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -83,7 +83,7 @@ Here is the list of the variables available that can be used to customize the bu | Variable | Default | Description | | ---------------------| ------- | ----------- | -| `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas` | +| `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` | | `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts`, `tinydream` | | `CLBLAST_DIR` | | Specify a CLBlast directory | | `CUDA_LIBPATH` | | Specify a CUDA library path | @@ -225,6 +225,17 @@ make BUILD_TYPE=clblas build To specify a clblast dir set: `CLBLAST_DIR` +#### Intel GPU acceleration + +Intel GPU acceleration is supported via SYCL. + +Requirements: [Intel oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html) (see also [llama.cpp setup installations instructions](https://github.com/ggerganov/llama.cpp/blob/d71ac90985854b0905e1abba778e407e17f9f887/README-sycl.md?plain=1#L56)) + +``` +make BUILD_TYPE=sycl_f16 build # for float16 +make BUILD_TYPE=sycl_f32 build # for float32 +``` + #### Metal (Apple Silicon) ``` diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 96577a84..0792de51 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -74,14 +74,6 @@ Note that this started just as a fun weekend project by [mudler](https://github. - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) - 🆕 [Vision API](https://localai.io/features/gpt-vision/) -## How does it work? - -LocalAI is an API written in Go that serves as an OpenAI shim, enabling software already developed with OpenAI SDKs to seamlessly integrate with LocalAI. It can be effortlessly implemented as a substitute, even on consumer-grade hardware. This capability is achieved by employing various C++ backends, including [ggml](https://github.com/ggerganov/ggml), to perform inference on LLMs using both CPU and, if desired, GPU. Internally LocalAI backends are just gRPC server, indeed you can specify and build your own gRPC server and extend LocalAI in runtime as well. It is possible to specify external gRPC server and/or binaries that LocalAI will manage internally. - -LocalAI uses a mixture of backends written in various languages (C++, Golang, Python, ...). You can check [the model compatibility table]({{%relref "docs/reference/compatibility-table" %}}) to learn about all the components of LocalAI. - -![localai](https://github.com/go-skynet/localai-website/assets/2420543/6492e685-8282-4217-9daa-e229a31548bc) - ## Contribute and help To help the project you can: @@ -114,20 +106,6 @@ LocalAI couldn't have been built without the help of great software already avai - https://github.com/rhasspy/piper - https://github.com/cmp-nct/ggllm.cpp - - -## Backstory - -As much as typical open source projects starts, I, [mudler](https://github.com/mudler/), was fiddling around with [llama.cpp](https://github.com/ggerganov/llama.cpp) over my long nights and wanted to have a way to call it from `go`, as I am a Golang developer and use it extensively. So I've created `LocalAI` (or what was initially known as `llama-cli`) and added an API to it. - -But guess what? The more I dived into this rabbit hole, the more I realized that I had stumbled upon something big. With all the fantastic C++ projects floating around the community, it dawned on me that I could piece them together to create a full-fledged OpenAI replacement. So, ta-da! LocalAI was born, and it quickly overshadowed its humble origins. - -Now, why did I choose to go with C++ bindings, you ask? Well, I wanted to keep LocalAI snappy and lightweight, allowing it to run like a champ on any system and avoid any Golang penalties of the GC, and, most importantly built on shoulders of giants like `llama.cpp`. Go is good at backends and API and is easy to maintain. And hey, don't forget that I'm all about sharing the love. That's why I made LocalAI MIT licensed, so everyone can hop on board and benefit from it. - -As if that wasn't exciting enough, as the project gained traction, [mkellerman](https://github.com/mkellerman) and [Aisuko](https://github.com/Aisuko) jumped in to lend a hand. mkellerman helped set up some killer examples, while Aisuko is becoming our community maestro. The community now is growing even more with new contributors and users, and I couldn't be happier about it! - -Oh, and let's not forget the real MVP here—[llama.cpp](https://github.com/ggerganov/llama.cpp). Without this extraordinary piece of software, LocalAI wouldn't even exist. So, a big shoutout to the community for making this magic happen! - ## 🤗 Contributors This is a community project, a special thanks to our contributors! 🤗 diff --git a/docs/content/docs/reference/architecture.md b/docs/content/docs/reference/architecture.md new file mode 100644 index 00000000..23abe111 --- /dev/null +++ b/docs/content/docs/reference/architecture.md @@ -0,0 +1,25 @@ + ++++ +disableToc = false +title = "Architecture" +weight = 25 ++++ + +LocalAI is an API written in Go that serves as an OpenAI shim, enabling software already developed with OpenAI SDKs to seamlessly integrate with LocalAI. It can be effortlessly implemented as a substitute, even on consumer-grade hardware. This capability is achieved by employing various C++ backends, including [ggml](https://github.com/ggerganov/ggml), to perform inference on LLMs using both CPU and, if desired, GPU. Internally LocalAI backends are just gRPC server, indeed you can specify and build your own gRPC server and extend LocalAI in runtime as well. It is possible to specify external gRPC server and/or binaries that LocalAI will manage internally. + +LocalAI uses a mixture of backends written in various languages (C++, Golang, Python, ...). You can check [the model compatibility table]({{%relref "docs/reference/compatibility-table" %}}) to learn about all the components of LocalAI. + +![localai](https://github.com/go-skynet/localai-website/assets/2420543/6492e685-8282-4217-9daa-e229a31548bc) + + +## Backstory + +As much as typical open source projects starts, I, [mudler](https://github.com/mudler/), was fiddling around with [llama.cpp](https://github.com/ggerganov/llama.cpp) over my long nights and wanted to have a way to call it from `go`, as I am a Golang developer and use it extensively. So I've created `LocalAI` (or what was initially known as `llama-cli`) and added an API to it. + +But guess what? The more I dived into this rabbit hole, the more I realized that I had stumbled upon something big. With all the fantastic C++ projects floating around the community, it dawned on me that I could piece them together to create a full-fledged OpenAI replacement. So, ta-da! LocalAI was born, and it quickly overshadowed its humble origins. + +Now, why did I choose to go with C++ bindings, you ask? Well, I wanted to keep LocalAI snappy and lightweight, allowing it to run like a champ on any system and avoid any Golang penalties of the GC, and, most importantly built on shoulders of giants like `llama.cpp`. Go is good at backends and API and is easy to maintain. And hey, don't forget that I'm all about sharing the love. That's why I made LocalAI MIT licensed, so everyone can hop on board and benefit from it. + +As if that wasn't exciting enough, as the project gained traction, [mkellerman](https://github.com/mkellerman) and [Aisuko](https://github.com/Aisuko) jumped in to lend a hand. mkellerman helped set up some killer examples, while Aisuko is becoming our community maestro. The community now is growing even more with new contributors and users, and I couldn't be happier about it! + +Oh, and let's not forget the real MVP here—[llama.cpp](https://github.com/ggerganov/llama.cpp). Without this extraordinary piece of software, LocalAI wouldn't even exist. So, a big shoutout to the community for making this magic happen! diff --git a/entrypoint.sh b/entrypoint.sh index 05f67128..ae1976af 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -13,6 +13,10 @@ if [ -n "$EXTRA_BACKENDS" ]; then done fi +if [ -e "/opt/intel/oneapi/setvars.sh" ]; then + source /opt/intel/oneapi/setvars.sh +fi + if [ "$REBUILD" != "false" ]; then rm -rf ./local-ai make build -j${BUILD_PARALLELISM:-1} From e46db63e06e40072dfa3946d382003633f799e47 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Fri, 2 Feb 2024 17:17:44 +0000 Subject: [PATCH 0028/2895] feat(mamba): Add bagel-dpo-2.8b (#1671) Adds the Mamba-slimpj model fine-tuned with bagel. https://huggingface.co/jondurbin/bagel-dpo-2.8b-v0.2 Signed-off-by: Richard Palethorpe --- embedded/models/mamba-bagel.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 embedded/models/mamba-bagel.yaml diff --git a/embedded/models/mamba-bagel.yaml b/embedded/models/mamba-bagel.yaml new file mode 100644 index 00000000..aed7142f --- /dev/null +++ b/embedded/models/mamba-bagel.yaml @@ -0,0 +1,21 @@ +name: bagel +backend: mamba +parameters: + model: "jondurbin/bagel-dpo-2.8b-v0.2" +systemPrompt: "You are a helpful, unbiased, uncensored assistant." +template: + chat_message: | + {{if eq .RoleName "assistant"}}{{.Content}}{{else}} + [INST] + {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName "system"}}<>{{.Content}}<> + + {{else if .Content}}{{.Content}}{{end}} + [/INST] + {{end}} + completion: | + {{.Input}} +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "bagel", + "messages": [{"role": "user", "content": "how are you doing"}], + }' From 3c24a70a1b1eb6d11c89ff04d9d617e7df31f6df Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <46977173+Wansmer@users.noreply.github.com> Date: Fri, 2 Feb 2024 20:18:03 +0300 Subject: [PATCH 0029/2895] fix (docs): fixed broken links `github/` -> `github.com/` (#1672) fix broken links --- docs/content/docs/advanced/fine-tuning.md | 2 +- examples/e2e-fine-tuning/README.md | 2 +- examples/e2e-fine-tuning/notebook.ipynb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/advanced/fine-tuning.md b/docs/content/docs/advanced/fine-tuning.md index 550cb025..0680a279 100644 --- a/docs/content/docs/advanced/fine-tuning.md +++ b/docs/content/docs/advanced/fine-tuning.md @@ -23,7 +23,7 @@ Fine-tuning a language model is a process that requires a lot of computational p Currently LocalAI doesn't support the fine-tuning endpoint as LocalAI but there are are [plans](https://github.com/mudler/LocalAI/issues/596) to support that. For the time being a guide is proposed here to give a simple starting point on how to fine-tune a model and use it with LocalAI (but also with llama.cpp). -There is an e2e example of fine-tuning a LLM model to use with [LocalAI](https://github/mudler/LocalAI) written by [@mudler](https://github.com/mudler) available [here](https://github.com/mudler/LocalAI/tree/master/examples/e2e-fine-tuning/). +There is an e2e example of fine-tuning a LLM model to use with [LocalAI](https://github.com/mudler/LocalAI) written by [@mudler](https://github.com/mudler) available [here](https://github.com/mudler/LocalAI/tree/master/examples/e2e-fine-tuning/). The steps involved are: diff --git a/examples/e2e-fine-tuning/README.md b/examples/e2e-fine-tuning/README.md index 2674b5af..af3ab8a3 100644 --- a/examples/e2e-fine-tuning/README.md +++ b/examples/e2e-fine-tuning/README.md @@ -1,4 +1,4 @@ -This is an example of fine-tuning a LLM model to use with [LocalAI](https://github/mudler/LocalAI) written by [@mudler](https://github.com/mudler). +This is an example of fine-tuning a LLM model to use with [LocalAI](https://github.com/mudler/LocalAI) written by [@mudler](https://github.com/mudler). Specifically, this example shows how to use [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) to fine-tune a LLM model to consume with LocalAI as a `gguf` model. diff --git a/examples/e2e-fine-tuning/notebook.ipynb b/examples/e2e-fine-tuning/notebook.ipynb index 9efb57d2..4996da5d 100644 --- a/examples/e2e-fine-tuning/notebook.ipynb +++ b/examples/e2e-fine-tuning/notebook.ipynb @@ -6,7 +6,7 @@ "source": [ "## Finetuning a model and using it with LocalAI\n", "\n", - "This is an example of fine-tuning a LLM model to use with [LocalAI](https://github/mudler/LocalAI) written by [@mudler](https://github.com/mudler).\n", + "This is an example of fine-tuning a LLM model to use with [LocalAI](https://github.com/mudler/LocalAI) written by [@mudler](https://github.com/mudler).\n", "\n", "Specifically, this example shows how to use [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) to fine-tune a LLM model to consume with LocalAI as a `gguf` model." ] From c87ca4f320e921dcada51d4042869360cfa4fc3c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 2 Feb 2024 19:14:03 +0100 Subject: [PATCH 0030/2895] :arrow_up: Update ggerganov/llama.cpp (#1669) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ae0babaa..9297a161 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 +CPPLLAMA_VERSION?=128dcbd3c9c4b12f42b560a4430427d7b2828628 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 7678b25755ba76ca6134473c209b117216462669 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 2 Feb 2024 22:46:26 +0100 Subject: [PATCH 0031/2895] :arrow_up: Update ggerganov/llama.cpp (#1673) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9297a161..a95abd48 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=128dcbd3c9c4b12f42b560a4430427d7b2828628 +CPPLLAMA_VERSION?=191221178f51b6e81122c5bda0fd79620e547d07 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From df13ba655c7bd5cf22b4d41454cd55650042e964 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 3 Feb 2024 13:01:13 +0100 Subject: [PATCH 0032/2895] Drop old falcon backend (deprecated) (#1675) Drop old falcon backend --- Makefile | 6 +-- README.md | 1 - backend/go/llm/falcon-ggml/main.go | 23 ---------- backend/go/llm/transformers/falcon.go | 43 ------------------- docs/content/docs/overview.md | 1 - .../docs/reference/compatibility-table.md | 3 +- pkg/model/initializers.go | 2 - 7 files changed, 2 insertions(+), 77 deletions(-) delete mode 100644 backend/go/llm/falcon-ggml/main.go delete mode 100644 backend/go/llm/transformers/falcon.go diff --git a/Makefile b/Makefile index a95abd48..5d0a51ca 100644 --- a/Makefile +++ b/Makefile @@ -145,7 +145,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -529,10 +529,6 @@ backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/lib CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/ -backend-assets/grpc/falcon-ggml: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./backend/go/llm/falcon-ggml/ - backend-assets/grpc/starcoder: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./backend/go/llm/starcoder/ diff --git a/README.md b/README.md index dbed541d..45512a45 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,6 @@ LocalAI couldn't have been built without the help of great software already avai - https://github.com/ggerganov/whisper.cpp - https://github.com/saharNooby/rwkv.cpp - https://github.com/rhasspy/piper -- https://github.com/cmp-nct/ggllm.cpp ## 🤗 Contributors diff --git a/backend/go/llm/falcon-ggml/main.go b/backend/go/llm/falcon-ggml/main.go deleted file mode 100644 index 426ae823..00000000 --- a/backend/go/llm/falcon-ggml/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.Falcon{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/transformers/falcon.go b/backend/go/llm/transformers/falcon.go deleted file mode 100644 index 5299fb02..00000000 --- a/backend/go/llm/transformers/falcon.go +++ /dev/null @@ -1,43 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Falcon struct { - base.SingleThread - - falcon *transformers.Falcon -} - -func (llm *Falcon) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewFalcon(opts.ModelFile) - llm.falcon = model - return err -} - -func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) { - return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 0792de51..c1232969 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -104,7 +104,6 @@ LocalAI couldn't have been built without the help of great software already avai - https://github.com/ggerganov/whisper.cpp - https://github.com/saharNooby/rwkv.cpp - https://github.com/rhasspy/piper -- https://github.com/cmp-nct/ggllm.cpp ## 🤗 Contributors diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index 3f46dadd..98446e1d 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -16,7 +16,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| -| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | +| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | | [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | GPT | no | yes | N/A | | [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | GPT | no | yes | N/A | | [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | GPT | no | yes | N/A | @@ -35,7 +35,6 @@ LocalAI will attempt to automatically load models which are not explicitly confi | [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion)) | stablediffusion | no | Image | no | no | N/A | | [langchain-huggingface](https://github.com/tmc/langchaingo) | Any text generators available on HuggingFace through API | yes | GPT | no | no | N/A | | [piper](https://github.com/rhasspy/piper) ([binding](https://github.com/mudler/go-piper)) | Any piper onnx model | no | Text to voice | no | no | N/A | -| [falcon](https://github.com/cmp-nct/ggllm.cpp/tree/c12b2d65f732a0d8846db2244e070f0f3e73505c) ([binding](https://github.com/mudler/go-ggllm.cpp)) | Falcon *** | yes | GPT | no | yes | CUDA | | [sentencetransformers](https://github.com/UKPLab/sentence-transformers) | BERT | no | Embeddings only | yes | no | N/A | | `bark` | bark | no | Audio generation | no | no | yes | | `autogptq` | GPTQ | yes | GPT | yes | no | N/A | diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index e293669a..8c5abdcc 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -34,7 +34,6 @@ const ( Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" Gpt4All = "gpt4all" - FalconGGMLBackend = "falcon-ggml" BertEmbeddingsBackend = "bert-embeddings" RwkvBackend = "rwkv" @@ -55,7 +54,6 @@ var AutoLoadBackends []string = []string{ Gpt4All, GPTNeoXBackend, BertEmbeddingsBackend, - FalconGGMLBackend, GPTJBackend, Gpt2Backend, DollyBackend, From f083a901fed0e667ecd5ccdb61f04ccae6788986 Mon Sep 17 00:00:00 2001 From: Nicolas Vermande Date: Sat, 3 Feb 2024 23:54:49 +0000 Subject: [PATCH 0033/2895] Fix HTTP links in README.md (#1677) Signed-off-by: Nicolas Vermande --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 45512a45..2d694cc4 100644 --- a/README.md +++ b/README.md @@ -109,10 +109,10 @@ Other: ### 🔗 Resources -- 🆕 New! [LLM finetuning guide](https://localai.io/advanced/fine-tuning/) +- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/) - [How to build locally](https://localai.io/basics/build/index.html) - [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes) -- [Projects integrating LocalAI](https://localai.io/integrations/) +- [Projects integrating LocalAI](https://localai.io/docs/integrations/) - [How tos section](https://io.midori-ai.xyz/howtos/) (curated by our community) ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social) From 38e4ec0b2a00c94bdffe74a8eabb6356aca795be Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 4 Feb 2024 00:55:12 +0100 Subject: [PATCH 0034/2895] :arrow_up: Update ggerganov/llama.cpp (#1678) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5d0a51ca..6f04a583 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=191221178f51b6e81122c5bda0fd79620e547d07 +CPPLLAMA_VERSION?=3c0d25c4756742ebf15ad44700fabc0700c638bd # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 98ad93d53e2b91a36a551e8251e17709c9d4593a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 4 Feb 2024 13:15:51 +0100 Subject: [PATCH 0035/2895] Drop ggml-based gpt2 and starcoder (supported by llama.cpp) (#1679) * Drop ggml-based gpt2 and starcoder (supported by llama.cpp) * Update compatibility table --- Makefile | 10 +------- backend/go/llm/gpt2/main.go | 23 ------------------- backend/go/llm/starcoder/main.go | 23 ------------------- .../docs/reference/compatibility-table.md | 5 ++-- pkg/model/initializers.go | 4 ---- 5 files changed, 3 insertions(+), 62 deletions(-) delete mode 100644 backend/go/llm/gpt2/main.go delete mode 100644 backend/go/llm/starcoder/main.go diff --git a/Makefile b/Makefile index 6f04a583..03ea7d51 100644 --- a/Makefile +++ b/Makefile @@ -145,7 +145,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -509,10 +509,6 @@ backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libt CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/ -backend-assets/grpc/gpt2: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./backend/go/llm/gpt2/ - backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/ @@ -529,10 +525,6 @@ backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/lib CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/ -backend-assets/grpc/starcoder: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./backend/go/llm/starcoder/ - backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv diff --git a/backend/go/llm/gpt2/main.go b/backend/go/llm/gpt2/main.go deleted file mode 100644 index 972e39d3..00000000 --- a/backend/go/llm/gpt2/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.GPT2{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/starcoder/main.go b/backend/go/llm/starcoder/main.go deleted file mode 100644 index 38534d6c..00000000 --- a/backend/go/llm/starcoder/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.Starcoder{}); err != nil { - panic(err) - } -} diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index 98446e1d..7c3fe154 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -16,18 +16,16 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| -| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | +| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | | [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | GPT | no | yes | N/A | | [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | GPT | no | yes | N/A | | [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | GPT | no | yes | N/A | | [falcon-ggml](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Falcon (*) | yes | GPT | no | no | N/A | -| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT2, Cerebras | yes | GPT | no | no | N/A | | [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Dolly | yes | GPT | no | no | N/A | | [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPTJ | yes | GPT | no | no | N/A | | [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | MPT | yes | GPT | no | no | N/A | | [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Replit | yes | GPT | no | no | N/A | | [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT NeoX, RedPajama, StableLM | yes | GPT | no | no | N/A | -| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Starcoder | yes | GPT | no | no | N/A| | [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp)) | Bloom | yes | GPT | no | no | N/A | | [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rwkv.cpp)) | rwkv | yes | GPT | no | yes | N/A | | [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp)) | bert | no | Embeddings only | yes | no | N/A | @@ -47,6 +45,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A | | `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `petals` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA | +| `transformers` | Various GPTs and quantization formats | yes | GPT, embeddings | yes | no | CPU/CUDA | Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})). diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 8c5abdcc..df0aaf2f 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -23,13 +23,11 @@ const ( GoLlamaBackend = "llama" LlamaGGML = "llama-ggml" LLamaCPP = "llama-cpp" - StarcoderBackend = "starcoder" GPTJBackend = "gptj" DollyBackend = "dolly" MPTBackend = "mpt" GPTNeoXBackend = "gptneox" ReplitBackend = "replit" - Gpt2Backend = "gpt2" Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" @@ -55,11 +53,9 @@ var AutoLoadBackends []string = []string{ GPTNeoXBackend, BertEmbeddingsBackend, GPTJBackend, - Gpt2Backend, DollyBackend, MPTBackend, ReplitBackend, - StarcoderBackend, RwkvBackend, WhisperBackend, StableDiffusionBackend, From 8ace0a9ba7fe093f2aee47bb1f9a3dbddb8631f1 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 4 Feb 2024 22:59:14 +0100 Subject: [PATCH 0036/2895] :arrow_up: Update ggerganov/llama.cpp (#1681) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 03ea7d51..0d591806 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=3c0d25c4756742ebf15ad44700fabc0700c638bd +CPPLLAMA_VERSION?=9392ebd49ea5ae236a55b47cbf6a13247e8a3b8c # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From ea7b33b0d2a285508781d1d36ea4cb98cf28fa83 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 5 Feb 2024 15:59:31 +0100 Subject: [PATCH 0037/2895] Update integrations.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/integrations.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index 77a32168..28e71bd5 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -24,5 +24,6 @@ The list below is a list of software that integrates with LocalAI. - https://github.com/mattermost/openops - https://github.com/charmbracelet/mods - https://github.com/cedriking/spark - +- [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI + Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages! From d8b17795d7375a005d213e3db74eb67c14e014b7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 6 Feb 2024 09:26:01 +0100 Subject: [PATCH 0038/2895] :arrow_up: Update ggerganov/llama.cpp (#1683) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0d591806..6ea5da49 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=9392ebd49ea5ae236a55b47cbf6a13247e8a3b8c +CPPLLAMA_VERSION?=098f6d737b65134cf220d12b9b706e8cfc5e4610 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From fd9d060c94ad17de187597e0042802e19f87bf15 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 6 Feb 2024 15:52:21 +0100 Subject: [PATCH 0039/2895] ci: fix sycl image suffix Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 69c7311f..c23cdabf 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -125,28 +125,28 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - tag-suffix: 'sycl-f16-core' + tag-suffix: '-sycl-f16-core' ffmpeg: 'false' image-type: 'core' runs-on: 'arc-runner-set' - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' - tag-suffix: 'sycl-f32-core' + tag-suffix: '-sycl-f32-core' ffmpeg: 'false' image-type: 'core' runs-on: 'arc-runner-set' - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - tag-suffix: 'sycl-f16-ffmpeg-core' + tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' runs-on: 'arc-runner-set' - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' - tag-suffix: 'sycl-f32-ffmpeg-core' + tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' image-type: 'core' runs-on: 'arc-runner-set' From d168c7c9dc64a55722d90cf21616644e68a24c28 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 6 Feb 2024 19:42:27 +0100 Subject: [PATCH 0040/2895] ci: cleanup worker before run (#1685) Signed-off-by: Ettore Di Giacinto --- .github/workflows/image_build.yml | 77 ++++++++++++++++--------------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 8358502b..ff9b751f 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -64,42 +64,47 @@ jobs: && sudo apt-get install -y git - name: Checkout uses: actions/checkout@v4 - # - name: Release space from worker - # run: | - # echo "Listing top largest packages" - # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - # head -n 30 <<< "${pkgs}" - # echo - # df -h - # echo - # sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - # sudo apt-get remove --auto-remove android-sdk-platform-tools || true - # sudo apt-get purge --auto-remove android-sdk-platform-tools || true - # sudo rm -rf /usr/local/lib/android - # sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - # sudo rm -rf /usr/share/dotnet - # sudo apt-get remove -y '^mono-.*' || true - # sudo apt-get remove -y '^ghc-.*' || true - # sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - # sudo apt-get remove -y 'php.*' || true - # sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - # sudo apt-get remove -y '^google-.*' || true - # sudo apt-get remove -y azure-cli || true - # sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - # sudo apt-get remove -y '^gfortran-.*' || true - # sudo apt-get remove -y microsoft-edge-stable || true - # sudo apt-get remove -y firefox || true - # sudo apt-get remove -y powershell || true - # sudo apt-get remove -y r-base-core || true - # sudo apt-get autoremove -y - # sudo apt-get clean - # echo - # echo "Listing top largest packages" - # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - # head -n 30 <<< "${pkgs}" - # echo - # sudo rm -rfv build || true - # df -h + - name: Release space from worker + if: inputs.runs-on == 'ubuntu-latest' + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + sudo apt-get remove --auto-remove android-sdk-platform-tools || true + sudo apt-get purge --auto-remove android-sdk-platform-tools || true + sudo rm -rf /usr/local/lib/android + sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^mono-.*' || true + sudo apt-get remove -y '^ghc-.*' || true + sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + sudo apt-get remove -y 'php.*' || true + sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + sudo apt-get remove -y '^google-.*' || true + sudo apt-get remove -y azure-cli || true + sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + sudo apt-get remove -y '^gfortran-.*' || true + sudo apt-get remove -y microsoft-edge-stable || true + sudo apt-get remove -y firefox || true + sudo apt-get remove -y powershell || true + sudo apt-get remove -y r-base-core || true + sudo apt-get autoremove -y + sudo apt-get clean + echo + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + sudo rm -rfv build || true + sudo rm -rf /usr/share/dotnet || true + sudo rm -rf /opt/ghc || true + sudo rm -rf "/usr/local/share/boost" || true + sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + df -h - name: Docker meta id: meta uses: docker/metadata-action@v5 From f76bb8954b814e98c1e3a2ef7a23118a067d4b64 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 6 Feb 2024 19:42:52 +0100 Subject: [PATCH 0041/2895] fix(Dockerfile): sycl dependencies (#1686) * fix(Dockerfile): sycl dependencies Signed-off-by: Ettore Di Giacinto * fix(ci): cleanup before running bark test --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/test-extra.yml | 40 ++++++++++++++++++++++++++++++++ Dockerfile | 5 +++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 172be096..faa480b8 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -171,6 +171,46 @@ jobs: uses: actions/checkout@v4 with: submodules: true + - name: Release space from worker + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + sudo apt-get remove --auto-remove android-sdk-platform-tools || true + sudo apt-get purge --auto-remove android-sdk-platform-tools || true + sudo rm -rf /usr/local/lib/android + sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^mono-.*' || true + sudo apt-get remove -y '^ghc-.*' || true + sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + sudo apt-get remove -y 'php.*' || true + sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + sudo apt-get remove -y '^google-.*' || true + sudo apt-get remove -y azure-cli || true + sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + sudo apt-get remove -y '^gfortran-.*' || true + sudo apt-get remove -y microsoft-edge-stable || true + sudo apt-get remove -y firefox || true + sudo apt-get remove -y powershell || true + sudo apt-get remove -y r-base-core || true + sudo apt-get autoremove -y + sudo apt-get clean + echo + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + sudo rm -rfv build || true + sudo rm -rf /usr/share/dotnet || true + sudo rm -rf /opt/ghc || true + sudo rm -rf "/usr/local/share/boost" || true + sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + df -h - name: Dependencies run: | sudo apt-get update diff --git a/Dockerfile b/Dockerfile index f81b5ee3..82cebf46 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,10 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \ sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \ - rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \ + rm -rf l_BaseKit_p_2024.0.1.46_offline.sh && \ + wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ + echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \ + apt-get update && apt-get install -y intel-oneapi-runtime-opencl && apt-get clean \ ; fi ENV PATH /usr/local/cuda/bin:${PATH} From e23e490455244ea3af2959b76fa43bf0f406e783 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 6 Feb 2024 20:48:29 +0100 Subject: [PATCH 0042/2895] Revert "fix(Dockerfile): sycl dependencies" (#1687) Revert "fix(Dockerfile): sycl dependencies (#1686)" This reverts commit f76bb8954b814e98c1e3a2ef7a23118a067d4b64. --- .github/workflows/test-extra.yml | 40 -------------------------------- Dockerfile | 5 +--- 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index faa480b8..172be096 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -171,46 +171,6 @@ jobs: uses: actions/checkout@v4 with: submodules: true - - name: Release space from worker - run: | - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - df -h - echo - sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - sudo apt-get remove --auto-remove android-sdk-platform-tools || true - sudo apt-get purge --auto-remove android-sdk-platform-tools || true - sudo rm -rf /usr/local/lib/android - sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - sudo rm -rf /usr/share/dotnet - sudo apt-get remove -y '^mono-.*' || true - sudo apt-get remove -y '^ghc-.*' || true - sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - sudo apt-get remove -y 'php.*' || true - sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - sudo apt-get remove -y '^google-.*' || true - sudo apt-get remove -y azure-cli || true - sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - sudo apt-get remove -y '^gfortran-.*' || true - sudo apt-get remove -y microsoft-edge-stable || true - sudo apt-get remove -y firefox || true - sudo apt-get remove -y powershell || true - sudo apt-get remove -y r-base-core || true - sudo apt-get autoremove -y - sudo apt-get clean - echo - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - sudo rm -rfv build || true - sudo rm -rf /usr/share/dotnet || true - sudo rm -rf /opt/ghc || true - sudo rm -rf "/usr/local/share/boost" || true - sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true - df -h - name: Dependencies run: | sudo apt-get update diff --git a/Dockerfile b/Dockerfile index 82cebf46..f81b5ee3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,10 +42,7 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \ sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \ - rm -rf l_BaseKit_p_2024.0.1.46_offline.sh && \ - wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \ - apt-get update && apt-get install -y intel-oneapi-runtime-opencl && apt-get clean \ + rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \ ; fi ENV PATH /usr/local/cuda/bin:${PATH} From 37e6974afe671ec3804b211e09927830f43fd843 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 6 Feb 2024 20:49:28 +0100 Subject: [PATCH 0043/2895] ci: fix extra(bark) tests Signed-off-by: Ettore Di Giacinto --- .github/workflows/test-extra.yml | 42 +++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 172be096..b1ecec25 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -167,6 +167,46 @@ jobs: tests-bark: runs-on: ubuntu-latest steps: + - name: Release space from worker + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + sudo apt-get remove --auto-remove android-sdk-platform-tools || true + sudo apt-get purge --auto-remove android-sdk-platform-tools || true + sudo rm -rf /usr/local/lib/android + sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^mono-.*' || true + sudo apt-get remove -y '^ghc-.*' || true + sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + sudo apt-get remove -y 'php.*' || true + sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + sudo apt-get remove -y '^google-.*' || true + sudo apt-get remove -y azure-cli || true + sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + sudo apt-get remove -y '^gfortran-.*' || true + sudo apt-get remove -y microsoft-edge-stable || true + sudo apt-get remove -y firefox || true + sudo apt-get remove -y powershell || true + sudo apt-get remove -y r-base-core || true + sudo apt-get autoremove -y + sudo apt-get clean + echo + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + sudo rm -rfv build || true + sudo rm -rf /usr/share/dotnet || true + sudo rm -rf /opt/ghc || true + sudo rm -rf "/usr/local/share/boost" || true + sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + df -h - name: Clone uses: actions/checkout@v4 with: @@ -274,4 +314,4 @@ jobs: run: | export PATH=$PATH:/opt/conda/bin make -C backend/python/coqui - make -C backend/python/coqui test \ No newline at end of file + make -C backend/python/coqui test From e0632f2ce2b950d1000ee9e38fbba493abcd58ac Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 00:16:52 +0100 Subject: [PATCH 0044/2895] fix(llama.cpp): downgrade to fix sycl build Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6ea5da49..a6890759 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=098f6d737b65134cf220d12b9b706e8cfc5e4610 +CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From d0a6a35b55e76838a5dbbcabcf337a46d87b78c8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 09:40:31 +0100 Subject: [PATCH 0045/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2d694cc4..25c90406 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651 - Mamba support: https://github.com/mudler/LocalAI/pull/1589 - Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 - 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489 From ddd21f1644ea8f6aff2e01f34e2b54b16db8964d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 8 Feb 2024 20:12:51 +0100 Subject: [PATCH 0046/2895] feat: Use ubuntu as base for container images, drop deprecated ggml-transformers backends (#1689) * cleanup backends * switch image to ubuntu 22.04 * adapt commands for ubuntu * transformers cleanup * no contrib on ubuntu * Change test model to gguf * ci: disable bark tests (too cpu-intensive) Signed-off-by: Ettore Di Giacinto * cleanup * refinements * use intel base image * Makefile: Add docker targets * Change test model --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/image-pr.yml | 7 ++ .github/workflows/image.yml | 18 ++++ .github/workflows/image_build.yml | 6 ++ .github/workflows/test-extra.yml | 132 +++++++++++------------ Dockerfile | 29 +++-- Makefile | 67 ++++++------ api/api_test.go | 23 ++-- backend/go/llm/transformers/dolly.go | 44 -------- backend/go/llm/transformers/gpt2.go | 42 -------- backend/go/llm/transformers/gptj.go | 42 -------- backend/go/llm/transformers/gptneox.go | 42 -------- backend/go/llm/transformers/mpt.go | 42 -------- backend/go/llm/transformers/predict.go | 26 ----- backend/go/llm/transformers/replit.go | 42 -------- backend/go/llm/transformers/starcoder.go | 43 -------- entrypoint.sh | 4 - pkg/model/initializers.go | 10 -- tests/models_fixtures/config.yaml | 4 +- tests/models_fixtures/gpt4.yaml | 2 +- tests/models_fixtures/gpt4_2.yaml | 2 +- 20 files changed, 161 insertions(+), 466 deletions(-) delete mode 100644 backend/go/llm/transformers/dolly.go delete mode 100644 backend/go/llm/transformers/gpt2.go delete mode 100644 backend/go/llm/transformers/gptj.go delete mode 100644 backend/go/llm/transformers/gptneox.go delete mode 100644 backend/go/llm/transformers/mpt.go delete mode 100644 backend/go/llm/transformers/predict.go delete mode 100644 backend/go/llm/transformers/replit.go delete mode 100644 backend/go/llm/transformers/starcoder.go diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 8dd699f5..ae8bd070 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -21,6 +21,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -39,6 +40,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -48,6 +50,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -60,6 +63,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -75,9 +79,11 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: 'sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -91,3 +97,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index c23cdabf..ac61deec 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -25,6 +25,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -44,6 +45,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: '' platforms: 'linux/amd64' tag-latest: 'false' @@ -51,6 +53,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -60,6 +63,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -69,6 +73,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -78,6 +83,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -87,6 +93,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: '' #platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64' @@ -94,6 +101,7 @@ jobs: tag-suffix: '' ffmpeg: '' image-type: 'extras' + base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' core-image-build: uses: ./.github/workflows/image_build.yml @@ -107,6 +115,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -121,10 +130,12 @@ jobs: tag-suffix: '-ffmpeg-core' ffmpeg: 'true' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f16-core' ffmpeg: 'false' image-type: 'core' @@ -132,6 +143,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f32-core' ffmpeg: 'false' image-type: 'core' @@ -139,6 +151,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -146,6 +159,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -158,6 +172,7 @@ jobs: tag-suffix: '-cublas-cuda11-core' ffmpeg: '' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "12" @@ -167,6 +182,7 @@ jobs: tag-suffix: '-cublas-cuda12-core' ffmpeg: '' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "11" @@ -177,6 +193,7 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -186,3 +203,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index ff9b751f..a45473b4 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -4,6 +4,11 @@ name: 'build container images (reusable)' on: workflow_call: inputs: + base-image: + description: 'Base image' + required: false + default: '' + type: string build-type: description: 'Build type' default: '' @@ -154,6 +159,7 @@ jobs: CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} FFMPEG=${{ inputs.ffmpeg }} IMAGE_TYPE=${{ inputs.image-type }} + BASE_IMAGE=${{ inputs.base-image }} context: . file: ./Dockerfile platforms: ${{ inputs.platforms }} diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index b1ecec25..68da2c56 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -164,74 +164,74 @@ jobs: - tests-bark: - runs-on: ubuntu-latest - steps: - - name: Release space from worker - run: | - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - df -h - echo - sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - sudo apt-get remove --auto-remove android-sdk-platform-tools || true - sudo apt-get purge --auto-remove android-sdk-platform-tools || true - sudo rm -rf /usr/local/lib/android - sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - sudo rm -rf /usr/share/dotnet - sudo apt-get remove -y '^mono-.*' || true - sudo apt-get remove -y '^ghc-.*' || true - sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - sudo apt-get remove -y 'php.*' || true - sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - sudo apt-get remove -y '^google-.*' || true - sudo apt-get remove -y azure-cli || true - sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - sudo apt-get remove -y '^gfortran-.*' || true - sudo apt-get remove -y microsoft-edge-stable || true - sudo apt-get remove -y firefox || true - sudo apt-get remove -y powershell || true - sudo apt-get remove -y r-base-core || true - sudo apt-get autoremove -y - sudo apt-get clean - echo - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - sudo rm -rfv build || true - sudo rm -rf /usr/share/dotnet || true - sudo rm -rf /opt/ghc || true - sudo rm -rf "/usr/local/share/boost" || true - sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true - df -h - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ - sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ - gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ - sudo apt-get update && \ - sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + # tests-bark: + # runs-on: ubuntu-latest + # steps: + # - name: Release space from worker + # run: | + # echo "Listing top largest packages" + # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + # head -n 30 <<< "${pkgs}" + # echo + # df -h + # echo + # sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + # sudo apt-get remove --auto-remove android-sdk-platform-tools || true + # sudo apt-get purge --auto-remove android-sdk-platform-tools || true + # sudo rm -rf /usr/local/lib/android + # sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + # sudo rm -rf /usr/share/dotnet + # sudo apt-get remove -y '^mono-.*' || true + # sudo apt-get remove -y '^ghc-.*' || true + # sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + # sudo apt-get remove -y 'php.*' || true + # sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + # sudo apt-get remove -y '^google-.*' || true + # sudo apt-get remove -y azure-cli || true + # sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + # sudo apt-get remove -y '^gfortran-.*' || true + # sudo apt-get remove -y microsoft-edge-stable || true + # sudo apt-get remove -y firefox || true + # sudo apt-get remove -y powershell || true + # sudo apt-get remove -y r-base-core || true + # sudo apt-get autoremove -y + # sudo apt-get clean + # echo + # echo "Listing top largest packages" + # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + # head -n 30 <<< "${pkgs}" + # echo + # sudo rm -rfv build || true + # sudo rm -rf /usr/share/dotnet || true + # sudo rm -rf /opt/ghc || true + # sudo rm -rf "/usr/local/share/boost" || true + # sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + # df -h + # - name: Clone + # uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Dependencies + # run: | + # sudo apt-get update + # sudo apt-get install build-essential ffmpeg + # curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + # sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + # gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + # sudo apt-get update && \ + # sudo apt-get install -y conda + # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 - sudo rm -rfv /usr/bin/conda || true + # sudo rm -rfv /usr/bin/conda || true - - name: Test bark - run: | - export PATH=$PATH:/opt/conda/bin - make -C backend/python/bark - make -C backend/python/bark test + # - name: Test bark + # run: | + # export PATH=$PATH:/opt/conda/bin + # make -C backend/python/bark + # make -C backend/python/bark test # Below tests needs GPU. Commented out for now diff --git a/Dockerfile b/Dockerfile index f81b5ee3..5cee6a23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,11 @@ -ARG GO_VERSION=1.21-bullseye +ARG GO_VERSION=1.21 ARG IMAGE_TYPE=extras +ARG BASE_IMAGE=ubuntu:22.04 + # extras or core +FROM ${BASE_IMAGE} as requirements-core -FROM golang:$GO_VERSION as requirements-core - +ARG GO_VERSION=1.21.7 ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=11 ARG CUDA_MINOR_VERSION=7 @@ -11,14 +13,17 @@ ARG TARGETARCH ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} - +ENV DEBIAN_FRONTEND=noninteractive ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh" ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ - apt-get install -y ca-certificates curl patch pip cmake && apt-get clean + apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean +# Install Go +RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz +ENV PATH $PATH:/usr/local/go/bin COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -30,21 +35,13 @@ RUN echo "Target Variant: $TARGETVARIANT" # CuBLAS requirements RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get install -y software-properties-common && \ - apt-add-repository contrib && \ - curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \ - dpkg -i cuda-keyring_1.0-1_all.deb && \ - rm -f cuda-keyring_1.0-1_all.deb && \ + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + rm -f cuda-keyring_1.1-1_all.deb && \ apt-get update && \ apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi -# oneapi requirements -RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ - wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \ - sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \ - rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \ - ; fi - ENV PATH /usr/local/cuda/bin:${PATH} # OpenBLAS requirements and stable diffusion diff --git a/Makefile b/Makefile index a6890759..51c941d4 100644 --- a/Makefile +++ b/Makefile @@ -14,9 +14,6 @@ CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8 -# go-ggml-transformers version -GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a - # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f @@ -145,7 +142,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface +ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml +ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all +ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv +ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper +ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) + GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -217,14 +223,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a -## CEREBRAS GPT -sources/go-ggml-transformers: - git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp sources/go-ggml-transformers - cd sources/go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1 - -sources/go-ggml-transformers/libtransformers.a: sources/go-ggml-transformers - $(MAKE) -C sources/go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a - sources/whisper.cpp: git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 @@ -252,12 +250,11 @@ sources/go-piper/libpiper_binding.a: sources/go-piper backend/cpp/llama/llama.cpp: LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp -get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/go-ggml-transformers sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream +get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream touch $@ replace: $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang - $(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(CURDIR)/sources/go-ggml-transformers $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go @@ -276,7 +273,6 @@ rebuild: ## Rebuilds the project $(MAKE) -C sources/go-llama clean $(MAKE) -C sources/go-llama-ggml clean $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean - $(MAKE) -C sources/go-ggml-transformers clean $(MAKE) -C sources/go-rwkv clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean @@ -321,7 +317,7 @@ run: prepare ## run local-ai test-models/testmodel: mkdir test-models mkdir test-dir - wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel + wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav @@ -505,26 +501,6 @@ backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ -backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/ - -backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/ - -backend-assets/grpc/gptneox: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./backend/go/llm/gptneox/ - -backend-assets/grpc/mpt: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./backend/go/llm/mpt/ - -backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/ - backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv @@ -556,3 +532,22 @@ backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper. $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/ grpcs: prepare $(GRPC_BACKENDS) + +DOCKER_IMAGE?=local-ai +IMAGE_TYPE?=core +BASE_IMAGE?=ubuntu:22.04 + +docker: + docker build \ + --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS=$(GO_TAGS) \ + --build-arg BUILD_TYPE=$(BUILD_TYPE) \ + -t $(DOCKER_IMAGE) . + +docker-image-intel: + docker build \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS="none" \ + --build-arg BUILD_TYPE=sycl_f16 -t $(DOCKER_IMAGE) . \ No newline at end of file diff --git a/api/api_test.go b/api/api_test.go index 491a56b5..04d2d6fe 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -29,6 +29,15 @@ import ( "github.com/sashabaranov/go-openai/jsonschema" ) +const testPrompt = `### System: +You are an AI assistant that follows instruction extremely well. Help as much as you can. + +### User: + +Can you help rephrasing sentences? + +### Response:` + type modelApplyRequest struct { ID string `json:"id"` URL string `json:"url"` @@ -629,28 +638,28 @@ var _ = Describe("API test", func() { Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"}) + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) It("can generate chat completions ", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate completions from model configs", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"}) + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) It("can generate chat completions from model configs", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) @@ -658,7 +667,7 @@ var _ = Describe("API test", func() { It("returns errors", func() { backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface - _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) + _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt}) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends))) }) @@ -834,13 +843,13 @@ var _ = Describe("API test", func() { app.Shutdown() }) It("can generate chat completions from config file (list1)", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate chat completions from config file (list2)", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) diff --git a/backend/go/llm/transformers/dolly.go b/backend/go/llm/transformers/dolly.go deleted file mode 100644 index b3579b04..00000000 --- a/backend/go/llm/transformers/dolly.go +++ /dev/null @@ -1,44 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Dolly struct { - base.SingleThread - - dolly *transformers.Dolly -} - -func (llm *Dolly) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewDolly(opts.ModelFile) - llm.dolly = model - return err -} - -func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) { - return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error { - - go func() { - res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/backend/go/llm/transformers/gpt2.go b/backend/go/llm/transformers/gpt2.go deleted file mode 100644 index ab162a76..00000000 --- a/backend/go/llm/transformers/gpt2.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPT2 struct { - base.SingleThread - - gpt2 *transformers.GPT2 -} - -func (llm *GPT2) Load(opts *pb.ModelOptions) error { - model, err := transformers.New(opts.ModelFile) - llm.gpt2 = model - return err -} - -func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/gptj.go b/backend/go/llm/transformers/gptj.go deleted file mode 100644 index f00f1044..00000000 --- a/backend/go/llm/transformers/gptj.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPTJ struct { - base.SingleThread - - gptj *transformers.GPTJ -} - -func (llm *GPTJ) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTJ(opts.ModelFile) - llm.gptj = model - return err -} - -func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/gptneox.go b/backend/go/llm/transformers/gptneox.go deleted file mode 100644 index a06d910e..00000000 --- a/backend/go/llm/transformers/gptneox.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPTNeoX struct { - base.SingleThread - - gptneox *transformers.GPTNeoX -} - -func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTNeoX(opts.ModelFile) - llm.gptneox = model - return err -} - -func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/mpt.go b/backend/go/llm/transformers/mpt.go deleted file mode 100644 index f6e0a143..00000000 --- a/backend/go/llm/transformers/mpt.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type MPT struct { - base.SingleThread - - mpt *transformers.MPT -} - -func (llm *MPT) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewMPT(opts.ModelFile) - llm.mpt = model - return err -} - -func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) { - return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/predict.go b/backend/go/llm/transformers/predict.go deleted file mode 100644 index 861d1196..00000000 --- a/backend/go/llm/transformers/predict.go +++ /dev/null @@ -1,26 +0,0 @@ -package transformers - -import ( - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -func buildPredictOptions(opts *pb.PredictOptions) []transformers.PredictOption { - predictOptions := []transformers.PredictOption{ - transformers.SetTemperature(float64(opts.Temperature)), - transformers.SetTopP(float64(opts.TopP)), - transformers.SetTopK(int(opts.TopK)), - transformers.SetTokens(int(opts.Tokens)), - transformers.SetThreads(int(opts.Threads)), - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, transformers.SetBatch(int(opts.Batch))) - } - - if opts.Seed != 0 { - predictOptions = append(predictOptions, transformers.SetSeed(int(opts.Seed))) - } - - return predictOptions -} diff --git a/backend/go/llm/transformers/replit.go b/backend/go/llm/transformers/replit.go deleted file mode 100644 index a979edcb..00000000 --- a/backend/go/llm/transformers/replit.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Replit struct { - base.SingleThread - - replit *transformers.Replit -} - -func (llm *Replit) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewReplit(opts.ModelFile) - llm.replit = model - return err -} - -func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) { - return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/starcoder.go b/backend/go/llm/transformers/starcoder.go deleted file mode 100644 index 25a758a0..00000000 --- a/backend/go/llm/transformers/starcoder.go +++ /dev/null @@ -1,43 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Starcoder struct { - base.SingleThread - - starcoder *transformers.Starcoder -} - -func (llm *Starcoder) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewStarcoder(opts.ModelFile) - llm.starcoder = model - return err -} - -func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) { - return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/entrypoint.sh b/entrypoint.sh index ae1976af..05f67128 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -13,10 +13,6 @@ if [ -n "$EXTRA_BACKENDS" ]; then done fi -if [ -e "/opt/intel/oneapi/setvars.sh" ]; then - source /opt/intel/oneapi/setvars.sh -fi - if [ "$REBUILD" != "false" ]; then rm -rf ./local-ai make build -j${BUILD_PARALLELISM:-1} diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index df0aaf2f..fce44fe1 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -23,11 +23,6 @@ const ( GoLlamaBackend = "llama" LlamaGGML = "llama-ggml" LLamaCPP = "llama-cpp" - GPTJBackend = "gptj" - DollyBackend = "dolly" - MPTBackend = "mpt" - GPTNeoXBackend = "gptneox" - ReplitBackend = "replit" Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" @@ -50,12 +45,7 @@ var AutoLoadBackends []string = []string{ LlamaGGML, GoLlamaBackend, Gpt4All, - GPTNeoXBackend, BertEmbeddingsBackend, - GPTJBackend, - DollyBackend, - MPTBackend, - ReplitBackend, RwkvBackend, WhisperBackend, StableDiffusionBackend, diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml index 3deabf9d..749d1699 100644 --- a/tests/models_fixtures/config.yaml +++ b/tests/models_fixtures/config.yaml @@ -4,7 +4,7 @@ top_p: 80 top_k: 0.9 temperature: 0.1 - context_size: 10 + context_size: 200 stopwords: - "HUMAN:" - "### Response:" @@ -20,7 +20,7 @@ top_k: 0.9 temperature: 0.1 model: testmodel - context_size: 10 + context_size: 200 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml index 77b72b30..652a407c 100644 --- a/tests/models_fixtures/gpt4.yaml +++ b/tests/models_fixtures/gpt4.yaml @@ -4,7 +4,7 @@ parameters: top_p: 80 top_k: 0.9 temperature: 0.1 -context_size: 10 +context_size: 200 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml index 62d9fdbc..904693ca 100644 --- a/tests/models_fixtures/gpt4_2.yaml +++ b/tests/models_fixtures/gpt4_2.yaml @@ -4,7 +4,7 @@ parameters: top_p: 80 top_k: 0.9 temperature: 0.1 -context_size: 10 +context_size: 200 stopwords: - "HUMAN:" - "### Response:" From f1f60359672226467cafdf38b4ef6e4c82cb75b9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 8 Feb 2024 20:39:00 +0100 Subject: [PATCH 0047/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 25c90406..ab079a63 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Intel GPU support (sycl): https://github.com/mudler/LocalAI/issues/1653 - Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651 - Mamba support: https://github.com/mudler/LocalAI/pull/1589 - Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 From fc8423392f908c2802d1e46353bf20037ad96a76 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 9 Feb 2024 00:02:23 +0100 Subject: [PATCH 0048/2895] :arrow_up: Update ggerganov/llama.cpp (#1688) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 51c941d4..4c96f0cd 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 +CPPLLAMA_VERSION?=6e99f2a04f1871d637dd77eb4d81de31a5510253 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 3875e5e0e52d63f50f7b7b24d21b4699c4e370da Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 9 Feb 2024 00:03:07 +0100 Subject: [PATCH 0049/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ab079a63..fa875e5a 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`: ``` -docker run -ti -p 8080:8080 localai/localai:v2.5.1-ffmpeg-core phi-2 +docker run -ti -p 8080:8080 localai/localai:v2.7.0-ffmpeg-core phi-2 ``` ## 🚀 [Features](https://localai.io/features/) From 31969679956755439cf789a7459c2ae7f68272c5 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 9 Feb 2024 22:50:34 +0100 Subject: [PATCH 0050/2895] :arrow_up: Update ggerganov/llama.cpp (#1691) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4c96f0cd..96a1d1b6 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=6e99f2a04f1871d637dd77eb4d81de31a5510253 +CPPLLAMA_VERSION?=4b7b38bef5addbd31f453871d79647fbae6bec8a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From ef1306f703769d5c716360f9ebb4eb67e2ae402d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 9 Feb 2024 22:59:15 +0100 Subject: [PATCH 0051/2895] :arrow_up: Update mudler/go-stable-diffusion (#1674) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96a1d1b6..084e75da 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07 # stablediffusion version -STABLEDIFFUSION_VERSION?=902db5f066fd137697e3b69d0fa10d4782bd2c2f +STABLEDIFFUSION_VERSION?=d5d2be8e7e395c2d73ceef61e6fe8d240f2cd831 # tinydream version TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a From 081bd07fd19ae24b59871085f4d462f15283e261 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 10 Feb 2024 21:33:14 +0100 Subject: [PATCH 0052/2895] :arrow_up: Update docs version mudler/LocalAI (#1693) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index dad9e122..f5a5a75c 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.7.0" + "version": "v2.8.0" } From 53dbe36f32a0f7e1d4806217501be994525e075c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 10 Feb 2024 21:37:03 +0100 Subject: [PATCH 0053/2895] feat(tts): respect YAMLs config file, add sycl docs/examples (#1692) * feat(refactor): refactor config and input reading * feat(tts): read config file for TTS * examples(kubernetes): Add simple deployment example * examples(kubernetes): Add simple deployment for intel arc * docs(sycl): add sycl example * feat(tts): do not always pick a first model * fixups to run vall-e-x on container * Correctly resolve backend --- Dockerfile | 2 +- api/backend/tts.go | 7 +- api/config/config.go | 54 +++++++++++ api/ctx/fiber.go | 43 +++++++++ api/localai/localai.go | 23 ++++- api/openai/chat.go | 4 +- api/openai/completion.go | 4 +- api/openai/edit.go | 4 +- api/openai/embeddings.go | 4 +- api/openai/image.go | 4 +- api/openai/request.go | 89 ++----------------- api/openai/transcription.go | 4 +- backend/python/vall-e-x/install.sh | 3 + .../content/docs/features/GPU-acceleration.md | 15 +++- examples/kubernetes/deployment-intel-arc.yaml | 68 ++++++++++++++ examples/kubernetes/deployment.yaml | 65 ++++++++++++++ main.go | 2 +- 17 files changed, 297 insertions(+), 98 deletions(-) create mode 100644 api/ctx/fiber.go create mode 100644 examples/kubernetes/deployment-intel-arc.yaml create mode 100644 examples/kubernetes/deployment.yaml diff --git a/Dockerfile b/Dockerfile index 5cee6a23..6c5e2745 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,7 +39,7 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb && \ apt-get update && \ - apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ + apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi ENV PATH /usr/local/cuda/bin:${PATH} diff --git a/api/backend/tts.go b/api/backend/tts.go index ae8f53ee..6e5ffcc0 100644 --- a/api/backend/tts.go +++ b/api/backend/tts.go @@ -7,6 +7,7 @@ import ( "path/filepath" api_config "github.com/go-skynet/LocalAI/api/config" + config "github.com/go-skynet/LocalAI/api/config" "github.com/go-skynet/LocalAI/api/options" "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" @@ -29,16 +30,20 @@ func generateUniqueFileName(dir, baseName, ext string) string { } } -func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) { +func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend } + + grpcOpts := gRPCModelOpts(c) + opts := modelOpts(api_config.Config{}, o, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), model.WithContext(o.Context), model.WithAssetDir(o.AssetsDestination), + model.WithLoadGRPCLoadModelOpts(grpcOpts), }) piperModel, err := o.Loader.BackendLoader(opts...) if err != nil { diff --git a/api/config/config.go b/api/config/config.go index 1b27b574..48d1b791 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -183,6 +183,60 @@ func (c *Config) FunctionToCall() string { return c.functionCallNameString } +// Load a config file for a model +func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) { + // Load a config file if present after the model name + modelConfig := filepath.Join(modelPath, modelName+".yaml") + + var cfg *Config + + defaults := func() { + cfg = DefaultConfig(modelName) + cfg.ContextSize = ctx + cfg.Threads = threads + cfg.F16 = f16 + cfg.Debug = debug + } + + cfgExisting, exists := cm.GetConfig(modelName) + if !exists { + if _, err := os.Stat(modelConfig); err == nil { + if err := cm.LoadConfig(modelConfig); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = cm.GetConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + defaults() + } + } else { + defaults() + } + } else { + cfg = &cfgExisting + } + + // Set the parameters for the language model prediction + //updateConfig(cfg, input) + + // Don't allow 0 as setting + if cfg.Threads == 0 { + if threads != 0 { + cfg.Threads = threads + } else { + cfg.Threads = 4 + } + } + + // Enforce debug flag if passed from CLI + if debug { + cfg.Debug = true + } + + return cfg, nil +} + func defaultPredictOptions(modelFile string) PredictionOptions { return PredictionOptions{ TopP: 0.7, diff --git a/api/ctx/fiber.go b/api/ctx/fiber.go new file mode 100644 index 00000000..ffb63111 --- /dev/null +++ b/api/ctx/fiber.go @@ -0,0 +1,43 @@ +package fiberContext + +import ( + "fmt" + "strings" + + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" +) + +// ModelFromContext returns the model from the context +// If no model is specified, it will take the first available +// Takes a model string as input which should be the one received from the user request. +// It returns the model name resolved from the context and an error if any. +func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { + if ctx.Params("model") != "" { + modelInput = ctx.Params("model") + } + + // Set model from bearer token, if available + bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ") + bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) + + // If no model was specified, take the first available + if modelInput == "" && !bearerExists && firstModel { + models, _ := loader.ListModels() + if len(models) > 0 { + modelInput = models[0] + log.Debug().Msgf("No model specified, using: %s", modelInput) + } else { + log.Debug().Msgf("No model specified, returning error") + return "", fmt.Errorf("no model specified") + } + } + + // If a model is found in bearer token takes precedence + if bearerExists { + log.Debug().Msgf("Using model from bearer token: %s", bearer) + modelInput = bearer + } + return modelInput, nil +} diff --git a/api/localai/localai.go b/api/localai/localai.go index c9aee2ae..7774ca47 100644 --- a/api/localai/localai.go +++ b/api/localai/localai.go @@ -3,6 +3,8 @@ package localai import ( "github.com/go-skynet/LocalAI/api/backend" config "github.com/go-skynet/LocalAI/api/config" + fiberContext "github.com/go-skynet/LocalAI/api/ctx" + "github.com/rs/zerolog/log" "github.com/go-skynet/LocalAI/api/options" "github.com/gofiber/fiber/v2" @@ -18,12 +20,31 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) return func(c *fiber.Ctx) error { input := new(TTSRequest) + // Get input data from the request body if err := c.BodyParser(input); err != nil { return err } - filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o) + modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + cfg, err := config.Load(modelFile, o.Loader.ModelPath, cm, false, 0, 0, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Request for model: %s", modelFile) + + if input.Backend != "" { + cfg.Backend = input.Input + } + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg) if err != nil { return err } diff --git a/api/openai/chat.go b/api/openai/chat.go index 02bf6149..819cd6b2 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -58,12 +58,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) return func(c *fiber.Ctx) error { processFunctions := false funcs := grammar.Functions{} - modelFile, input, err := readInput(c, o, true) + modelFile, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/completion.go b/api/openai/completion.go index c0607632..b098451d 100644 --- a/api/openai/completion.go +++ b/api/openai/completion.go @@ -53,14 +53,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe } return func(c *fiber.Ctx) error { - modelFile, input, err := readInput(c, o, true) + modelFile, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } log.Debug().Msgf("`input`: %+v", input) - config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/edit.go b/api/openai/edit.go index 888b9db7..16679ae5 100644 --- a/api/openai/edit.go +++ b/api/openai/edit.go @@ -18,12 +18,12 @@ import ( func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readInput(c, o, true) + modelFile, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/embeddings.go b/api/openai/embeddings.go index 15e31e92..44feb373 100644 --- a/api/openai/embeddings.go +++ b/api/openai/embeddings.go @@ -18,12 +18,12 @@ import ( // https://platform.openai.com/docs/api-reference/embeddings func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - model, input, err := readInput(c, o, true) + model, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/image.go b/api/openai/image.go index 3e4bc349..07f028f0 100644 --- a/api/openai/image.go +++ b/api/openai/image.go @@ -61,7 +61,7 @@ func downloadFile(url string) (string, error) { */ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readInput(c, o, false) + m, input, err := readRequest(c, o, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -71,7 +71,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx } log.Debug().Msgf("Loading model: %+v", m) - config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false) + config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/request.go b/api/openai/request.go index cc15fe40..382a930e 100644 --- a/api/openai/request.go +++ b/api/openai/request.go @@ -7,11 +7,10 @@ import ( "fmt" "io/ioutil" "net/http" - "os" - "path/filepath" "strings" config "github.com/go-skynet/LocalAI/api/config" + fiberContext "github.com/go-skynet/LocalAI/api/ctx" options "github.com/go-skynet/LocalAI/api/options" "github.com/go-skynet/LocalAI/api/schema" model "github.com/go-skynet/LocalAI/pkg/model" @@ -19,8 +18,7 @@ import ( "github.com/rs/zerolog/log" ) -func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) { - loader := o.Loader +func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *schema.OpenAIRequest, error) { input := new(schema.OpenAIRequest) ctx, cancel := context.WithCancel(o.Context) input.Context = ctx @@ -30,38 +28,13 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche return "", nil, fmt.Errorf("failed parsing request body: %w", err) } - modelFile := input.Model - - if c.Params("model") != "" { - modelFile = c.Params("model") - } - received, _ := json.Marshal(input) log.Debug().Msgf("Request received: %s", string(received)) - // Set model from bearer token, if available - bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ") - bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) + modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, firstModel) - // If no model was specified, take the first available - if modelFile == "" && !bearerExists && randomModel { - models, _ := loader.ListModels() - if len(models) > 0 { - modelFile = models[0] - log.Debug().Msgf("No model specified, using: %s", modelFile) - } else { - log.Debug().Msgf("No model specified, returning error") - return "", nil, fmt.Errorf("no model specified") - } - } - - // If a model is found in bearer token takes precedence - if bearerExists { - log.Debug().Msgf("Using model from bearer token: %s", bearer) - modelFile = bearer - } - return modelFile, input, nil + return modelFile, input, err } // this function check if the string is an URL, if it's an URL downloads the image in memory @@ -95,7 +68,7 @@ func getBase64Image(s string) (string, error) { return "", fmt.Errorf("not valid string") } -func updateConfig(config *config.Config, input *schema.OpenAIRequest) { +func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) { if input.Echo { config.Echo = input.Echo } @@ -282,55 +255,11 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) { } } -func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) { - // Load a config file if present after the model name - modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml") - - var cfg *config.Config - - defaults := func() { - cfg = config.DefaultConfig(modelFile) - cfg.ContextSize = ctx - cfg.Threads = threads - cfg.F16 = f16 - cfg.Debug = debug - } - - cfgExisting, exists := cm.GetConfig(modelFile) - if !exists { - if _, err := os.Stat(modelConfig); err == nil { - if err := cm.LoadConfig(modelConfig); err != nil { - return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = cm.GetConfig(modelFile) - if exists { - cfg = &cfgExisting - } else { - defaults() - } - } else { - defaults() - } - } else { - cfg = &cfgExisting - } +func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) { + cfg, err := config.Load(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16) // Set the parameters for the language model prediction - updateConfig(cfg, input) + updateRequestConfig(cfg, input) - // Don't allow 0 as setting - if cfg.Threads == 0 { - if threads != 0 { - cfg.Threads = threads - } else { - cfg.Threads = 4 - } - } - - // Enforce debug flag if passed from CLI - if debug { - cfg.Debug = true - } - - return cfg, input, nil + return cfg, input, err } diff --git a/api/openai/transcription.go b/api/openai/transcription.go index 895c110f..668a2069 100644 --- a/api/openai/transcription.go +++ b/api/openai/transcription.go @@ -19,12 +19,12 @@ import ( // https://platform.openai.com/docs/api-reference/audio/create func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readInput(c, o, false) + m, input, err := readRequest(c, o, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh index 2fe29d19..653eab7f 100644 --- a/backend/python/vall-e-x/install.sh +++ b/backend/python/vall-e-x/install.sh @@ -12,6 +12,9 @@ echo $CONDA_PREFIX git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd +# Pin some dependencies (the upstream requirements is too much loose) +pip install torchaudio==2.2.0 + cp -rfv $CONDA_PREFIX/vall-e-x/* ./ if [ "$PIP_CACHE_PURGE" = true ] ; then diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index 9688e787..aa931f07 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -112,14 +112,24 @@ llama_init_from_file: kv self size = 512.00 MB ## Intel acceleration (sycl) -#### Requirements +### Requirements -Requirement: [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) +If building from source, you need to install [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) and have the Intel drivers available in the system. + +### Container images To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ... The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags). +#### Example + +To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example: + +```bash +docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2 +``` + ### Notes In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example: @@ -128,3 +138,4 @@ In addition to the commands to run LocalAI normally, you need to specify `--devi docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core ``` +Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled. diff --git a/examples/kubernetes/deployment-intel-arc.yaml b/examples/kubernetes/deployment-intel-arc.yaml new file mode 100644 index 00000000..f77182bd --- /dev/null +++ b/examples/kubernetes/deployment-intel-arc.yaml @@ -0,0 +1,68 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: local-ai +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: models-pvc + namespace: local-ai +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: local-ai + namespace: local-ai + labels: + app: local-ai +spec: + selector: + matchLabels: + app: local-ai + replicas: 1 + template: + metadata: + labels: + app: local-ai + name: local-ai + spec: + containers: + - args: + - phi-2 + env: + - name: DEBUG + value: "true" + name: local-ai + image: quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core + imagePullPolicy: Always + resources: + limits: + gpu.intel.com/i915: 1 + volumeMounts: + - name: models-volume + mountPath: /build/models + volumes: + - name: models-volume + persistentVolumeClaim: + claimName: models-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: local-ai + namespace: local-ai +spec: + selector: + app: local-ai + type: LoadBalancer + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 \ No newline at end of file diff --git a/examples/kubernetes/deployment.yaml b/examples/kubernetes/deployment.yaml new file mode 100644 index 00000000..601fffdb --- /dev/null +++ b/examples/kubernetes/deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: local-ai +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: models-pvc + namespace: local-ai +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: local-ai + namespace: local-ai + labels: + app: local-ai +spec: + selector: + matchLabels: + app: local-ai + replicas: 1 + template: + metadata: + labels: + app: local-ai + name: local-ai + spec: + containers: + - args: + - phi-2 + env: + - name: DEBUG + value: "true" + name: local-ai + image: quay.io/go-skynet/local-ai:master-ffmpeg-core + imagePullPolicy: IfNotPresent + volumeMounts: + - name: models-volume + mountPath: /build/models + volumes: + - name: models-volume + persistentVolumeClaim: + claimName: models-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: local-ai + namespace: local-ai +spec: + selector: + app: local-ai + type: LoadBalancer + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 \ No newline at end of file diff --git a/main.go b/main.go index d2209285..edf70328 100644 --- a/main.go +++ b/main.go @@ -404,7 +404,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit defer opts.Loader.StopAllGRPC() - filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts) + filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts, config.Config{}) if err != nil { return err } From 58cdf97361936b11c26d59a65e027f2d008c1113 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 11 Feb 2024 10:01:11 +0100 Subject: [PATCH 0054/2895] :arrow_up: Update ggerganov/llama.cpp (#1694) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 084e75da..4e1f5190 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=4b7b38bef5addbd31f453871d79647fbae6bec8a +CPPLLAMA_VERSION?=f026f8120f97090d34a52b3dc023c82e0ede3f7d # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From fd68bf708402b3e7206f751e0254ff07df563434 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 11 Feb 2024 11:20:00 +0100 Subject: [PATCH 0055/2895] fix(vall-e-x): Fix voice cloning (#1696) --- backend/python/vall-e-x/ttsvalle.py | 4 ++++ docs/content/docs/features/text-to-audio.md | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/python/vall-e-x/ttsvalle.py b/backend/python/vall-e-x/ttsvalle.py index d7c5d700..fc9d93bd 100644 --- a/backend/python/vall-e-x/ttsvalle.py +++ b/backend/python/vall-e-x/ttsvalle.py @@ -55,6 +55,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): print("Preparing models, please wait", file=sys.stderr) # download and load all models preload_models() + self.clonedVoice = False # Assume directory from request.ModelFile. # Only if request.LoraAdapter it's not an absolute path if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath): @@ -65,6 +66,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.AudioPath != "": print("Generating model", file=sys.stderr) make_prompt(name=model_name, audio_prompt_path=request.AudioPath) + self.clonedVoice = True ### Use given transcript ##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav", ## transcript="Just, what was that? Paimon thought we were gonna get eaten.") @@ -91,6 +93,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): try: audio_array = None if model != "": + if self.clonedVoice: + model = os.path.basename(request.model) audio_array = generate_audio(request.text, prompt=model) else: audio_array = generate_audio(request.text) diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md index 68dfbaad..57b783ee 100644 --- a/docs/content/docs/features/text-to-audio.md +++ b/docs/content/docs/features/text-to-audio.md @@ -144,15 +144,15 @@ parameters: model: "cloned-voice" vall-e: # The path to the audio file to be cloned - # relative to the models directory - audio_path: "path-to-wav-source.wav" + # relative to the models directory + # Max 15s + audio_path: "audio-sample.wav" ``` Then you can specify the model name in the requests: ``` curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "backend": "vall-e-x", "model": "cloned-voice", "input":"Hello!" }' | aplay From 6e0eb96c61c41db7d51a5d258d95a3da8565c667 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 11 Feb 2024 11:28:59 +0100 Subject: [PATCH 0056/2895] fix: drop unused code (#1697) Signed-off-by: Ettore Di Giacinto --- backend/go/llm/dolly/main.go | 23 ----------------------- backend/go/llm/gptj/main.go | 23 ----------------------- backend/go/llm/gptneox/main.go | 23 ----------------------- backend/go/llm/mpt/main.go | 23 ----------------------- backend/go/llm/replit/main.go | 23 ----------------------- 5 files changed, 115 deletions(-) delete mode 100644 backend/go/llm/dolly/main.go delete mode 100644 backend/go/llm/gptj/main.go delete mode 100644 backend/go/llm/gptneox/main.go delete mode 100644 backend/go/llm/mpt/main.go delete mode 100644 backend/go/llm/replit/main.go diff --git a/backend/go/llm/dolly/main.go b/backend/go/llm/dolly/main.go deleted file mode 100644 index d00a30f0..00000000 --- a/backend/go/llm/dolly/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.Dolly{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/gptj/main.go b/backend/go/llm/gptj/main.go deleted file mode 100644 index 10f16a29..00000000 --- a/backend/go/llm/gptj/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.GPTJ{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/gptneox/main.go b/backend/go/llm/gptneox/main.go deleted file mode 100644 index 450cd5da..00000000 --- a/backend/go/llm/gptneox/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.GPTNeoX{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/mpt/main.go b/backend/go/llm/mpt/main.go deleted file mode 100644 index 3455078f..00000000 --- a/backend/go/llm/mpt/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.MPT{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/replit/main.go b/backend/go/llm/replit/main.go deleted file mode 100644 index df605149..00000000 --- a/backend/go/llm/replit/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.Replit{}); err != nil { - panic(err) - } -} From 4436e62cf1c9655e6f71664405aae6b88d4e1722 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 12 Feb 2024 09:56:04 +0100 Subject: [PATCH 0057/2895] :arrow_up: Update ggerganov/llama.cpp (#1698) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4e1f5190..28a0cbc7 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=f026f8120f97090d34a52b3dc023c82e0ede3f7d +CPPLLAMA_VERSION?=3bdc4cd0f595a6096cca4a64aa75ffa8a3503465 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 02f6e18adc06a62ad9895be56bebdcdfa6711e92 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 12 Feb 2024 22:43:33 +0100 Subject: [PATCH 0058/2895] :arrow_up: Update ggerganov/llama.cpp (#1700) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 28a0cbc7..fc3bab6a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=3bdc4cd0f595a6096cca4a64aa75ffa8a3503465 +CPPLLAMA_VERSION?=099afc6274c859ca67146e725839f2d97a5ef313 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 2e61ff32adf249955225ac3e50842c35ac17beea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Tue, 13 Feb 2024 00:35:39 -0800 Subject: [PATCH 0059/2895] ci: add cuda builds to release (#1702) Signed-off-by: Sertac Ozercan --- .github/workflows/release.yaml | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 6c66138c..82745225 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -20,6 +20,10 @@ jobs: defines: '-DLLAMA_AVX2=OFF' - build: 'avx512' defines: '-DLLAMA_AVX512=ON' + - build: 'cuda12' + defines: '' + - build: 'cuda11' + defines: '' runs-on: ubuntu-latest steps: - name: Clone @@ -33,7 +37,18 @@ jobs: run: | sudo apt-get update sudo apt-get install build-essential ffmpeg - + - name: Install CUDA Dependencies + if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }} + run: | + if [ "${{ matrix.build }}" == "cuda12" ]; then + export CUDA_VERSION=12-3 + else + export CUDA_VERSION=11-7 + fi + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} - name: Cache grpc id: cache-grpc uses: actions/cache@v3 @@ -50,14 +65,19 @@ jobs: - name: Install gRPC run: | cd grpc && cd cmake/build && sudo make -j12 install - - name: Build id: build env: CMAKE_ARGS: "${{ matrix.defines }}" BUILD_ID: "${{ matrix.build }}" run: | - STATIC=true make dist + if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then + export BUILD_TYPE=cublas + export PATH=/usr/local/cuda/bin:$PATH + make dist + else + STATIC=true make dist + fi - uses: actions/upload-artifact@v3 with: name: ${{ matrix.build }} @@ -109,4 +129,4 @@ jobs: if: startsWith(github.ref, 'refs/tags/') with: files: | - release/* \ No newline at end of file + release/* From c56b6ddb1cee8b8b2a19ddeb9efdb464e1789f2e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 13 Feb 2024 21:17:21 +0100 Subject: [PATCH 0060/2895] fix(llama.cpp): disable infinite context shifting (#1704) Infinite context loop might as well trigger an infinite loop of context shifting if the model hallucinates and does not stop answering. This has the unpleasant effect that the predicion never terminates, which is the case especially on small models which tends to hallucinate. Workarounds https://github.com/mudler/LocalAI/issues/1333 by removing context-shifting. See also upstream issue: https://github.com/ggerganov/llama.cpp/issues/3969 --- backend/cpp/llama/grpc-server.cpp | 36 +++++++++++-------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 35ca6ea5..954e472a 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -1387,30 +1387,20 @@ struct llama_server_context { if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx) { - // Shift context - const int n_left = system_tokens.size() + slot.n_past - slot.params.n_keep - 1; - const int n_discard = n_left / 2; + // START LOCALAI changes + // Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969) + // See: https://github.com/mudler/LocalAI/issues/1333 + // Context is exhausted, release the slot + slot.release(); + send_final_response(slot); + slot.cache_tokens.clear(); + slot.n_past = 0; + slot.truncated = false; + slot.has_next_token = true; + LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size()); - LOG_TEE("slot %d: context shift - n_keep = %d, n_left = %d, n_discard = %d\n", slot.id, slot.params.n_keep, n_left, n_discard); - llama_kv_cache_seq_rm (ctx, slot.id, slot.params.n_keep + 1 , slot.params.n_keep + n_discard + 1); - llama_kv_cache_seq_shift(ctx, slot.id, slot.params.n_keep + 1 + n_discard, system_tokens.size() + slot.n_past, -n_discard); - - for (size_t i = slot.params.n_keep + 1 + n_discard; i < slot.cache_tokens.size(); i++) - { - slot.cache_tokens[i - n_discard] = slot.cache_tokens[i]; - } - - slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard); - - slot.n_past -= n_discard; - - slot.truncated = true; - - LOG_VERBOSE("context shift", { - { "n_ctx", n_ctx }, - { "n_keep", params.n_keep }, - { "n_left", n_left }, - }); + continue; + // END LOCALAI changes } } } From 39a6b562cfb1a8c488fc2560e4f32abebfa13d8e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 14 Feb 2024 10:28:06 +0100 Subject: [PATCH 0061/2895] fix(llama.cpp): downgrade to a known working version (#1706) sycl support is broken otherwise. See upstream issue: https://github.com/ggerganov/llama.cpp/issues/5469 Signed-off-by: Ettore Di Giacinto --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fc3bab6a..306d60c4 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=099afc6274c859ca67146e725839f2d97a5ef313 +CPPLLAMA_VERSION?=f026f8120f97090d34a52b3dc023c82e0ede3f7d # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all @@ -550,4 +550,4 @@ docker-image-intel: --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ - --build-arg BUILD_TYPE=sycl_f16 -t $(DOCKER_IMAGE) . \ No newline at end of file + --build-arg BUILD_TYPE=sycl_f16 -t $(DOCKER_IMAGE) . From 5e155fb081bbf41d8f0a5061201089dd5a7a9041 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 14 Feb 2024 21:44:12 +0100 Subject: [PATCH 0062/2895] fix(python): pin exllama2 (#1711) fix(python): pin python deps Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/python/diffusers/Makefile | 7 ++++--- backend/python/diffusers/install.sh | 24 ++++++++++++++++++++++++ backend/python/exllama2/install.sh | 14 ++++++++++++-- backend/python/mamba/install.sh | 2 +- backend/python/vall-e-x/install.sh | 5 +---- 6 files changed, 43 insertions(+), 11 deletions(-) create mode 100755 backend/python/diffusers/install.sh mode change 100644 => 100755 backend/python/mamba/install.sh diff --git a/Makefile b/Makefile index 306d60c4..c63d46f8 100644 --- a/Makefile +++ b/Makefile @@ -550,4 +550,4 @@ docker-image-intel: --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ - --build-arg BUILD_TYPE=sycl_f16 -t $(DOCKER_IMAGE) . + --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index f3f9d4e2..4ec03c71 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -1,8 +1,9 @@ +CONDA_ENV_PATH = "diffusers.yml" + .PHONY: diffusers diffusers: - @echo "Creating virtual environment..." - @conda env create --name diffusers --file diffusers.yml - @echo "Virtual environment created." + @echo "Installing $(CONDA_ENV_PATH)..." + bash install.sh $(CONDA_ENV_PATH) .PHONY: run run: diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh new file mode 100755 index 00000000..0429826e --- /dev/null +++ b/backend/python/diffusers/install.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -ex + +# Check if environment exist +conda_env_exists(){ + ! conda list --name "${@}" >/dev/null 2>/dev/null +} + +if conda_env_exists "diffusers" ; then + echo "Creating virtual environment..." + conda env create --name diffusers --file $1 + echo "Virtual environment created." +else + echo "Virtual environment already exists." +fi + +if [ "$PIP_CACHE_PURGE" = true ] ; then + export PATH=$PATH:/opt/conda/bin + + # Activate conda environment + source activate diffusers + + pip cache purge +fi \ No newline at end of file diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index 44d45364..a6df3d37 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -1,15 +1,25 @@ #!/bin/bash - +set -e ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment export PATH=$PATH:/opt/conda/bin +export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f # Activate conda environment source activate transformers echo $CONDA_PREFIX -git clone https://github.com/turboderp/exllamav2 $CONDA_PREFIX/exllamav2 && pushd $CONDA_PREFIX/exllamav2 && pip install -r requirements.txt && popd +git clone https://github.com/turboderp/exllamav2 $CONDA_PREFIX/exllamav2 + +pushd $CONDA_PREFIX/exllamav2 + +git checkout -b build $SHA + +# TODO: this needs to be pinned within the conda environments +pip install -r requirements.txt + +popd cp -rfv $CONDA_PREFIX/exllamav2/* ./ diff --git a/backend/python/mamba/install.sh b/backend/python/mamba/install.sh old mode 100644 new mode 100755 index b69e22e7..e56b83c2 --- a/backend/python/mamba/install.sh +++ b/backend/python/mamba/install.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -e ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment export PATH=$PATH:/opt/conda/bin diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh index 653eab7f..26ccdccd 100644 --- a/backend/python/vall-e-x/install.sh +++ b/backend/python/vall-e-x/install.sh @@ -10,10 +10,7 @@ source activate transformers echo $CONDA_PREFIX -git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd - -# Pin some dependencies (the upstream requirements is too much loose) -pip install torchaudio==2.2.0 +git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && popd cp -rfv $CONDA_PREFIX/vall-e-x/* ./ From e690bf387a27de277368e2f742a616e1b2600d5b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 15 Feb 2024 17:33:06 +0100 Subject: [PATCH 0063/2895] fix(tts): fix regression when supplying backend from requests (#1713) fixes #1707 Signed-off-by: Ettore Di Giacinto --- api/localai/localai.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/localai/localai.go b/api/localai/localai.go index 7774ca47..3abe440e 100644 --- a/api/localai/localai.go +++ b/api/localai/localai.go @@ -41,7 +41,7 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) log.Debug().Msgf("Request for model: %s", modelFile) if input.Backend != "" { - cfg.Backend = input.Input + cfg.Backend = input.Backend } filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg) From fb0a4c5d9a1fa425bb1c61e354faf26efa41154a Mon Sep 17 00:00:00 2001 From: fenfir Date: Fri, 16 Feb 2024 15:08:50 +0100 Subject: [PATCH 0064/2895] Build docker container for ROCm (#1595) * Dockerfile changes to build for ROCm * Adjust linker flags for ROCm * Update conda env for diffusers and transformers to use ROCm pytorch * Update transformers conda env for ROCm * ci: build hipblas images * fixup rebase * use self-hosted Signed-off-by: mudler * specify LD_LIBRARY_PATH only when BUILD_TYPE=hipblas --------- Signed-off-by: mudler Co-authored-by: mudler --- .github/workflows/image-pr.yml | 8 ++ .github/workflows/image.yml | 32 +++++ Dockerfile | 7 +- Makefile | 4 +- .../python/common-env/transformers/Makefile | 4 + .../transformers/transformers-rocm.yml | 109 ++++++++++++++++++ backend/python/diffusers/Makefile | 8 +- backend/python/diffusers/diffusers-rocm.yml | 64 ++++++++++ backend/python/diffusers/diffusers.yml | 2 +- 9 files changed, 233 insertions(+), 5 deletions(-) create mode 100644 backend/python/common-env/transformers/transformers-rocm.yml create mode 100644 backend/python/diffusers/diffusers-rocm.yml diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index ae8bd070..527a8479 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -51,6 +51,14 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas' + ffmpeg: 'false' + image-type: 'extras' + base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + runs-on: 'arc-runner-set' core-image-build: uses: ./.github/workflows/image_build.yml with: diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index ac61deec..830528a1 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -103,6 +103,22 @@ jobs: image-type: 'extras' base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + runs-on: 'arc-runner-set' + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas' + ffmpeg: 'false' + image-type: 'extras' + base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + runs-on: 'arc-runner-set' core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -124,6 +140,22 @@ jobs: strategy: matrix: include: + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + runs-on: 'arc-runner-set' + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas-core' + ffmpeg: 'false' + image-type: 'core' + base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + runs-on: 'arc-runner-set' - build-type: '' platforms: 'linux/amd64' tag-latest: 'false' diff --git a/Dockerfile b/Dockerfile index 6c5e2745..a04a866e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ -ARG GO_VERSION=1.21 ARG IMAGE_TYPE=extras ARG BASE_IMAGE=ubuntu:22.04 @@ -42,8 +41,12 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi +# Cuda ENV PATH /usr/local/cuda/bin:${PATH} +# HipBLAS requirements +ENV PATH /opt/rocm/bin:${PATH} + # OpenBLAS requirements and stable diffusion RUN apt-get install -y \ libopenblas-dev \ @@ -70,7 +73,9 @@ RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmo apt-get install -y conda && apt-get clean ENV PATH="/root/.cargo/bin:${PATH}" +RUN apt-get install -y python3-pip && apt-get clean RUN pip install --upgrade pip + RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN apt-get install -y espeak-ng espeak && apt-get clean diff --git a/Makefile b/Makefile index c63d46f8..31434e50 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,8 @@ endif ifeq ($(BUILD_TYPE),hipblas) ROCM_HOME ?= /opt/rocm + ROCM_PATH ?= /opt/rocm + LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib export CXX=$(ROCM_HOME)/llvm/bin/clang++ export CC=$(ROCM_HOME)/llvm/bin/clang # llama-ggml has no hipblas support, so override it here. @@ -105,7 +107,7 @@ ifeq ($(BUILD_TYPE),hipblas) GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100 AMDGPU_TARGETS ?= "$(GPU_TARGETS)" CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" - CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link + CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib endif ifeq ($(BUILD_TYPE),metal) diff --git a/backend/python/common-env/transformers/Makefile b/backend/python/common-env/transformers/Makefile index 47a5ba25..1cd71ab1 100644 --- a/backend/python/common-env/transformers/Makefile +++ b/backend/python/common-env/transformers/Makefile @@ -4,6 +4,10 @@ ifeq ($(BUILD_TYPE), cublas) CONDA_ENV_PATH = "transformers-nvidia.yml" endif +ifeq ($(BUILD_TYPE), hipblas) + CONDA_ENV_PATH = "transformers-rocm.yml" +endif + .PHONY: transformers transformers: @echo "Installing $(CONDA_ENV_PATH)..." diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml new file mode 100644 index 00000000..1f5d2236 --- /dev/null +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -0,0 +1,109 @@ +name: transformers +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py311h06a4308_0 + - python=3.11.5=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py311h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.41.2=py311h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - --pre + - --extra-index-url https://download.pytorch.org/whl/nightly/ + - accelerate==0.23.0 + - aiohttp==3.8.5 + - aiosignal==1.3.1 + - async-timeout==4.0.3 + - attrs==23.1.0 + - bark==0.1.5 + - boto3==1.28.61 + - botocore==1.31.61 + - certifi==2023.7.22 + - TTS==0.22.0 + - charset-normalizer==3.3.0 + - datasets==2.14.5 + - sentence-transformers==2.2.2 + - sentencepiece==0.1.99 + - dill==0.3.7 + - einops==0.7.0 + - encodec==0.1.1 + - filelock==3.12.4 + - frozenlist==1.4.0 + - fsspec==2023.6.0 + - funcy==2.0 + - grpcio==1.59.0 + - huggingface-hub + - idna==3.4 + - jinja2==3.1.2 + - jmespath==1.0.1 + - markupsafe==2.1.3 + - mpmath==1.3.0 + - multidict==6.0.4 + - multiprocess==0.70.15 + - networkx + - numpy==1.26.0 + - packaging==23.2 + - pandas + - peft==0.5.0 + - protobuf==4.24.4 + - psutil==5.9.5 + - pyarrow==13.0.0 + - python-dateutil==2.8.2 + - pytz==2023.3.post1 + - pyyaml==6.0.1 + - regex==2023.10.3 + - requests==2.31.0 + - rouge==1.0.1 + - s3transfer==0.7.0 + - safetensors==0.3.3 + - scipy==1.11.3 + - six==1.16.0 + - sympy==1.12 + - tokenizers + - torch + - torchaudio + - tqdm==4.66.1 + - triton==2.1.0 + - typing-extensions==4.8.0 + - tzdata==2023.3 + - auto-gptq==0.6.0 + - urllib3==1.26.17 + - xxhash==3.4.1 + - yarl==1.9.2 + - soundfile + - langid + - wget + - unidecode + - pyopenjtalk-prebuilt + - pypinyin + - inflect + - cn2an + - jieba + - eng_to_ipa + - openai-whisper + - matplotlib + - gradio==3.41.2 + - nltk + - sudachipy + - sudachidict_core + - vocos + - vllm==0.2.7 + - transformers>=4.36.0 # Required for Mixtral. + - xformers==0.0.23.post1 +prefix: /opt/conda/envs/transformers diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index 4ec03c71..70a62b60 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -1,4 +1,8 @@ -CONDA_ENV_PATH = "diffusers.yml" +export CONDA_ENV_PATH = "diffusers.yml" + +ifeq ($(BUILD_TYPE), hipblas) +export CONDA_ENV_PATH = "diffusers-rocm.yml" +endif .PHONY: diffusers diffusers: @@ -12,4 +16,4 @@ run: @echo "Diffusers run." test: - bash test.sh \ No newline at end of file + bash test.sh diff --git a/backend/python/diffusers/diffusers-rocm.yml b/backend/python/diffusers/diffusers-rocm.yml new file mode 100644 index 00000000..f261701d --- /dev/null +++ b/backend/python/diffusers/diffusers-rocm.yml @@ -0,0 +1,64 @@ +name: diffusers +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py311h06a4308_0 + - python=3.11.5=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py311h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - tzdata=2023c=h04d1e81_0 + - wheel=0.41.2=py311h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - --pre + - --extra-index-url https://download.pytorch.org/whl/nightly/ + - accelerate>=0.11.0 + - certifi==2023.7.22 + - charset-normalizer==3.3.0 + - compel==2.0.2 + - diffusers==0.24.0 + - filelock==3.12.4 + - fsspec==2023.9.2 + - grpcio==1.59.0 + - huggingface-hub>=0.19.4 + - idna==3.4 + - importlib-metadata==6.8.0 + - jinja2==3.1.2 + - markupsafe==2.1.3 + - mpmath==1.3.0 + - networkx==3.1 + - numpy==1.26.0 + - omegaconf + - packaging==23.2 + - pillow==10.0.1 + - protobuf==4.24.4 + - psutil==5.9.5 + - pyparsing==3.1.1 + - pyyaml==6.0.1 + - regex==2023.10.3 + - requests==2.31.0 + - safetensors==0.4.0 + - sympy==1.12 + - tqdm==4.66.1 + - transformers>=4.25.1 + - triton==2.1.0 + - typing-extensions==4.8.0 + - urllib3==2.0.6 + - zipp==3.17.0 + - torch +prefix: /opt/conda/envs/diffusers diff --git a/backend/python/diffusers/diffusers.yml b/backend/python/diffusers/diffusers.yml index a37f41d9..b1a7d9f9 100644 --- a/backend/python/diffusers/diffusers.yml +++ b/backend/python/diffusers/diffusers.yml @@ -71,4 +71,4 @@ dependencies: - typing-extensions==4.8.0 - urllib3==2.0.6 - zipp==3.17.0 -prefix: /opt/conda/envs/diffusers \ No newline at end of file +prefix: /opt/conda/envs/diffusers From 2151d218621079afa21ae85a799d0c2fd3e0d633 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 16 Feb 2024 15:11:53 +0100 Subject: [PATCH 0065/2895] :arrow_up: Update docs version mudler/LocalAI (#1718) * :arrow_up: Update docs version mudler/LocalAI Signed-off-by: GitHub * Update docs/data/version.json Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: GitHub Signed-off-by: Ettore Di Giacinto Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index f5a5a75c..890f6c35 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.8.0" + "version": "v2.8.2" } From 6b539a2972e1e2404452e46d00447ac27247fda3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 16 Feb 2024 15:22:35 +0100 Subject: [PATCH 0066/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fa875e5a..af47a5e9 100644 --- a/README.md +++ b/README.md @@ -43,20 +43,21 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 - Intel GPU support (sycl): https://github.com/mudler/LocalAI/issues/1653 - Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651 - Mamba support: https://github.com/mudler/LocalAI/pull/1589 - Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 - 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489 -- Inline templates: https://github.com/mudler/LocalAI/pull/1452 -- Mixtral: https://github.com/mudler/LocalAI/pull/1449 - Img2vid https://github.com/mudler/LocalAI/pull/1442 -- Musicgen https://github.com/mudler/LocalAI/pull/1387 Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - +- Tools support: https://github.com/mudler/LocalAI/pull/1715 +- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 +- Upload file API: https://github.com/mudler/LocalAI/pull/1703 + If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 ## 💻 [Getting started](https://localai.io/basics/getting_started/index.html) From c72808f18b7ea9811557b201e82e88a54b2abc61 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 17 Feb 2024 10:00:34 +0100 Subject: [PATCH 0067/2895] feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= --- api/api.go | 6 +- api/openai/chat.go | 331 ++++++++++++++++++++++++++++----------- api/openai/request.go | 15 ++ api/schema/openai.go | 21 +++ pkg/grammar/functions.go | 6 + pkg/model/loader.go | 1 + 6 files changed, 287 insertions(+), 93 deletions(-) diff --git a/api/api.go b/api/api.go index 7ec95f1b..946204d2 100644 --- a/api/api.go +++ b/api/api.go @@ -146,7 +146,11 @@ func App(opts ...options.AppOption) (*fiber.App, error) { } // Default middleware config - app.Use(recover.New()) + + if !options.Debug { + app.Use(recover.New()) + } + if options.Metrics != nil { app.Use(metrics.APIMiddleware(options.Metrics)) } diff --git a/api/openai/chat.go b/api/openai/chat.go index 819cd6b2..68c3a291 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -55,6 +55,98 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) }) close(responses) } + processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + result := "" + _, tokenUsage, _ := ComputeChoices(req, prompt, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + result += s + // TODO: Change generated BNF grammar to be compliant with the schema so we can + // stream the result token by token here. + return true + }) + + ss := map[string]interface{}{} + name, args := parseFunctionCall(result) + ss["name"], ss["arguments"] = name, args + + if name == noAction { + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + result, err := handleQuestion(config, req, o, args, prompt) + if err != nil { + log.Error().Msgf("error handling question: %s", err.Error()) + return + } + + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, + } + + responses <- resp + close(responses) + return + } + + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: 0, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + responses <- schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: 0, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Arguments: args, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + close(responses) + } + return func(c *fiber.Ctx) error { processFunctions := false funcs := grammar.Functions{} @@ -122,7 +214,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } // functions are not supported in stream mode (yet?) - toStream := input.Stream && !processFunctions + toStream := input.Stream log.Debug().Msgf("Parameters: %+v", config) @@ -145,6 +237,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } r := config.Roles[role] contentExists := i.Content != nil && i.StringContent != "" + // First attempt to populate content via a chat message specific template if config.TemplateConfig.ChatMessage != "" { chatMessageData := model.ChatMessageTemplateData{ @@ -152,6 +245,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) Role: r, RoleName: role, Content: i.StringContent, + FunctionName: i.Name, MessageIndex: messageIndex, } templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) @@ -254,17 +348,24 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) log.Debug().Msgf("Grammar: %+v", config.Grammar) } - if toStream { + switch { + case toStream: responses := make(chan schema.OpenAIResponse) - go process(predInput, input, config, o.Loader, responses) + if !processFunctions { + go process(predInput, input, config, o.Loader, responses) + } else { + go processTools(noActionName, predInput, input, config, o.Loader, responses) + } c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { - usage := &schema.OpenAIUsage{} - + toolsCalled := false for ev := range responses { usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it + if len(ev.Choices[0].Delta.ToolCalls) > 0 { + toolsCalled = true + } var buf bytes.Buffer enc := json.NewEncoder(&buf) enc.Encode(ev) @@ -278,13 +379,20 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) w.Flush() } + finishReason := "stop" + if toolsCalled { + finishReason = "tool_calls" + } else if toolsCalled && len(input.Tools) == 0 { + finishReason = "function_call" + } + resp := &schema.OpenAIResponse{ ID: id, Created: created, Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { - FinishReason: "stop", + FinishReason: finishReason, Index: 0, Delta: &schema.Message{Content: &emptyMessage}, }}, @@ -298,102 +406,141 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) w.Flush() })) return nil - } - result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) { - if processFunctions { - // As we have to change the result before processing, we can't stream the answer (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s = utils.EscapeNewLines(s) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) + default: + result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) { + if processFunctions { + ss := map[string]interface{}{} - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name := ss["function"] - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - d, _ := json.Marshal(args) + name, args := parseFunctionCall(s) + ss["name"], ss["arguments"] = name, args - ss["arguments"] = string(d) - ss["name"] = func_name - - // if do nothing, reply with a message - if func_name == noActionName { - log.Debug().Msgf("nothing to do, computing a reply") - - // If there is a message that the LLM already sends as part of the JSON reply, use it - arguments := map[string]interface{}{} - json.Unmarshal([]byte(d), &arguments) - m, exists := arguments["message"] - if exists { - switch message := m.(type) { - case string: - if message != "" { - log.Debug().Msgf("Reply received from LLM: %s", message) - message = backend.Finetune(*config, predInput, message) - log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) - - *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}}) - return - } + // if do nothing, reply with a message + if name == noActionName { + result, err := handleQuestion(config, input, o, args, predInput) + if err != nil { + log.Error().Msgf("error handling question: %s", err.Error()) + return + } + *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &result}}) + } else { + if len(input.Tools) > 0 { + // Result is different in the case we have a tool call + *c = append(*c, schema.Choice{ + FinishReason: "tool_calls", + Message: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + Arguments: args, + }, + }, + }, + }, + }) + } else { + // otherwise reply with the function call + *c = append(*c, schema.Choice{ + FinishReason: "function_call", + Message: &schema.Message{ + Role: "assistant", + FunctionCall: ss, + }, + }) } } - log.Debug().Msgf("No action received from LLM, without a message, computing a reply") - // Otherwise ask the LLM to understand the JSON output and the context, and return a message - // Note: This costs (in term of CPU) another computation - config.Grammar = "" - images := []string{} - for _, m := range input.Messages { - images = append(images, m.StringImages...) - } - predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil) - if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) - return - } - - prediction, err := predFunc() - if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) - return - } - - fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response) - *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}}) - } else { - // otherwise reply with the function call - *c = append(*c, schema.Choice{ - FinishReason: "function_call", - Message: &schema.Message{Role: "assistant", FunctionCall: ss}, - }) + return } - return + *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) + }, nil) + if err != nil { + return err } - *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) - }, nil) - if err != nil { - return err + + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "chat.completion", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, + } + respData, _ := json.Marshal(resp) + log.Debug().Msgf("Response: %s", respData) + + // Return the prediction in the response body + return c.JSON(resp) } - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "chat.completion", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, - } - respData, _ := json.Marshal(resp) - log.Debug().Msgf("Response: %s", respData) - - // Return the prediction in the response body - return c.JSON(resp) } } + +func handleQuestion(config *config.Config, input *schema.OpenAIRequest, o *options.Option, args, prompt string) (string, error) { + log.Debug().Msgf("nothing to do, computing a reply") + + // If there is a message that the LLM already sends as part of the JSON reply, use it + arguments := map[string]interface{}{} + json.Unmarshal([]byte(args), &arguments) + m, exists := arguments["message"] + if exists { + switch message := m.(type) { + case string: + if message != "" { + log.Debug().Msgf("Reply received from LLM: %s", message) + message = backend.Finetune(*config, prompt, message) + log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) + + return message, nil + } + } + } + + log.Debug().Msgf("No action received from LLM, without a message, computing a reply") + // Otherwise ask the LLM to understand the JSON output and the context, and return a message + // Note: This costs (in term of CPU/GPU) another computation + config.Grammar = "" + images := []string{} + for _, m := range input.Messages { + images = append(images, m.StringImages...) + } + + predFunc, err := backend.ModelInference(input.Context, prompt, images, o.Loader, *config, o, nil) + if err != nil { + log.Error().Msgf("inference error: %s", err.Error()) + return "", err + } + + prediction, err := predFunc() + if err != nil { + log.Error().Msgf("inference error: %s", err.Error()) + return "", err + } + return backend.Finetune(*config, prompt, prediction.Response), nil +} + +func parseFunctionCall(llmresult string) (string, string) { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name := ss["function"] + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + d, _ := json.Marshal(args) + + return func_name.(string), string(d) +} diff --git a/api/openai/request.go b/api/openai/request.go index 382a930e..6a7a14e8 100644 --- a/api/openai/request.go +++ b/api/openai/request.go @@ -13,6 +13,7 @@ import ( fiberContext "github.com/go-skynet/LocalAI/api/ctx" options "github.com/go-skynet/LocalAI/api/options" "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -136,6 +137,20 @@ func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) { } } + if len(input.Tools) > 0 { + for _, tool := range input.Tools { + input.Functions = append(input.Functions, tool.Function) + } + } + + if input.ToolsChoice != nil { + var toolChoice grammar.Tool + json.Unmarshal([]byte(input.ToolsChoice.(string)), &toolChoice) + input.FunctionCall = map[string]interface{}{ + "name": toolChoice.Function.Name, + } + } + // Decode each request's message content index := 0 for i, m := range input.Messages { diff --git a/api/schema/openai.go b/api/schema/openai.go index 6355ff63..12a39b42 100644 --- a/api/schema/openai.go +++ b/api/schema/openai.go @@ -68,6 +68,10 @@ type ContentURL struct { type Message struct { // The message role Role string `json:"role,omitempty" yaml:"role"` + + // The message name (used for tools calls) + Name string `json:"name,omitempty" yaml:"name"` + // The message content Content interface{} `json:"content" yaml:"content"` @@ -76,6 +80,20 @@ type Message struct { // A result of a function call FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"` + + ToolCalls []ToolCall `json:"tool_calls,omitempty" yaml:"tool_call,omitempty"` +} + +type ToolCall struct { + Index int `json:"index"` + ID string `json:"id"` + Type string `json:"type"` + FunctionCall FunctionCall `json:"function"` +} + +type FunctionCall struct { + Name string `json:"name,omitempty"` + Arguments string `json:"arguments"` } type OpenAIModel struct { @@ -117,6 +135,9 @@ type OpenAIRequest struct { Functions []grammar.Function `json:"functions" yaml:"functions"` FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object + Tools []grammar.Tool `json:"tools,omitempty" yaml:"tools"` + ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"` + Stream bool `json:"stream"` // Image (not supported by OpenAI) diff --git a/pkg/grammar/functions.go b/pkg/grammar/functions.go index ef56662b..1038f5e6 100644 --- a/pkg/grammar/functions.go +++ b/pkg/grammar/functions.go @@ -11,6 +11,12 @@ type Function struct { } type Functions []Function +type Tool struct { + Type string `json:"type"` + Function Function `json:"function,omitempty"` +} +type Tools []Tool + func (f Functions) ToJSONStructure() JSONFunctionStructure { js := JSONFunctionStructure{} for _, function := range f { diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 37c2a603..bea32fb7 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -33,6 +33,7 @@ type ChatMessageTemplateData struct { SystemPrompt string Role string RoleName string + FunctionName string Content string MessageIndex int } From 01205fd4c0e606f7d8e68b23bd3c74fb4a032b3e Mon Sep 17 00:00:00 2001 From: Steven Christou <1302212+christ66@users.noreply.github.com> Date: Sun, 18 Feb 2024 02:12:02 -0800 Subject: [PATCH 0068/2895] Initial implementation of upload files api. (#1703) * Initial implementation of upload files api. * Move sanitize method to utils. * Save uploaded data to uploads folder. * Avoid loop if we do not have a purpose. * Minor cleanup of api and fix bug where deleting duplicate filename cause error. * Revert defer of saving config * Moved creation of directory to startup. * Make file names unique when storing on disk. * Add test for files api. * Update dependencies. --- api/api.go | 16 +++ api/openai/files.go | 207 ++++++++++++++++++++++++++++ api/openai/files_test.go | 286 +++++++++++++++++++++++++++++++++++++++ api/options/options.go | 7 + go.mod | 4 +- go.sum | 2 - main.go | 7 + pkg/utils/path.go | 12 ++ 8 files changed, 538 insertions(+), 3 deletions(-) create mode 100644 api/openai/files.go create mode 100644 api/openai/files_test.go diff --git a/api/api.go b/api/api.go index 946204d2..4442421e 100644 --- a/api/api.go +++ b/api/api.go @@ -223,8 +223,12 @@ func App(opts ...options.AppOption) (*fiber.App, error) { // Make sure directories exists os.MkdirAll(options.ImageDir, 0755) os.MkdirAll(options.AudioDir, 0755) + os.MkdirAll(options.UploadDir, 0755) os.MkdirAll(options.Loader.ModelPath, 0755) + // Load upload json + openai.LoadUploadConfig(options.UploadDir) + modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint()) @@ -244,6 +248,18 @@ func App(opts ...options.AppOption) (*fiber.App, error) { app.Post("/v1/edits", auth, openai.EditEndpoint(cl, options)) app.Post("/edits", auth, openai.EditEndpoint(cl, options)) + // files + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, options)) + app.Post("/files", auth, openai.UploadFilesEndpoint(cl, options)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, options)) + app.Get("/files", auth, openai.ListFilesEndpoint(cl, options)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, options)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, options)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, options)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, options)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, options)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, options)) + // completion app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, options)) app.Post("/completions", auth, openai.CompletionEndpoint(cl, options)) diff --git a/api/openai/files.go b/api/openai/files.go new file mode 100644 index 00000000..f19e79d8 --- /dev/null +++ b/api/openai/files.go @@ -0,0 +1,207 @@ +package openai + +import ( + "encoding/json" + "errors" + "fmt" + config "github.com/go-skynet/LocalAI/api/config" + "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" + "os" + "path/filepath" + "time" +) + +var uploadedFiles []File + +// File represents the structure of a file object from the OpenAI API. +type File struct { + ID string `json:"id"` // Unique identifier for the file + Object string `json:"object"` // Type of the object (e.g., "file") + Bytes int `json:"bytes"` // Size of the file in bytes + CreatedAt time.Time `json:"created_at"` // The time at which the file was created + Filename string `json:"filename"` // The name of the file + Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.) +} + +func saveUploadConfig(uploadDir string) { + file, err := json.MarshalIndent(uploadedFiles, "", " ") + if err != nil { + log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err) + } + + err = os.WriteFile(filepath.Join(uploadDir, "uploadedFiles.json"), file, 0644) + if err != nil { + log.Error().Msgf("Failed to save uploadedFiles to file: %s", err) + } +} + +func LoadUploadConfig(uploadPath string) { + file, err := os.ReadFile(filepath.Join(uploadPath, "uploadedFiles.json")) + if err != nil { + log.Error().Msgf("Failed to read file: %s", err) + } else { + err = json.Unmarshal(file, &uploadedFiles) + if err != nil { + log.Error().Msgf("Failed to JSON unmarshal the file into uploadedFiles: %s", err) + } + } +} + +// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create +func UploadFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + file, err := c.FormFile("file") + if err != nil { + return err + } + + // Check the file size + if file.Size > int64(o.UploadLimitMB*1024*1024) { + return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("File size %d exceeds upload limit %d", file.Size, o.UploadLimitMB)) + } + + purpose := c.FormValue("purpose", "") //TODO put in purpose dirs + if purpose == "" { + return c.Status(fiber.StatusBadRequest).SendString("Purpose is not defined") + } + + // Sanitize the filename to prevent directory traversal + filename := utils.SanitizeFileName(file.Filename) + + savePath := filepath.Join(o.UploadDir, filename) + + // Check if file already exists + if _, err := os.Stat(savePath); !os.IsNotExist(err) { + return c.Status(fiber.StatusBadRequest).SendString("File already exists") + } + + err = c.SaveFile(file, savePath) + if err != nil { + return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error()) + } + + f := File{ + ID: fmt.Sprintf("file-%d", time.Now().Unix()), + Object: "file", + Bytes: int(file.Size), + CreatedAt: time.Now(), + Filename: file.Filename, + Purpose: purpose, + } + + uploadedFiles = append(uploadedFiles, f) + saveUploadConfig(o.UploadDir) + return c.Status(fiber.StatusOK).JSON(f) + } +} + +// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list +func ListFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + type ListFiles struct { + Data []File + Object string + } + + return func(c *fiber.Ctx) error { + var listFiles ListFiles + + purpose := c.Query("purpose") + if purpose == "" { + listFiles.Data = uploadedFiles + } else { + for _, f := range uploadedFiles { + if purpose == f.Purpose { + listFiles.Data = append(listFiles.Data, f) + } + } + } + listFiles.Object = "list" + return c.Status(fiber.StatusOK).JSON(listFiles) + } +} + +func getFileFromRequest(c *fiber.Ctx) (*File, error) { + id := c.Params("file_id") + if id == "" { + return nil, fmt.Errorf("file_id parameter is required") + } + + for _, f := range uploadedFiles { + if id == f.ID { + return &f, nil + } + } + + return nil, fmt.Errorf("unable to find file id %s", id) +} + +// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve +func GetFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + file, err := getFileFromRequest(c) + if err != nil { + return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + } + + return c.JSON(file) + } +} + +// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete +func DeleteFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + type DeleteStatus struct { + Id string + Object string + Deleted bool + } + + return func(c *fiber.Ctx) error { + file, err := getFileFromRequest(c) + if err != nil { + return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + } + + err = os.Remove(filepath.Join(o.UploadDir, file.Filename)) + if err != nil { + // If the file doesn't exist then we should just continue to remove it + if !errors.Is(err, os.ErrNotExist) { + return c.Status(fiber.StatusInternalServerError).SendString(fmt.Sprintf("Unable to delete file: %s, %v", file.Filename, err)) + } + } + + // Remove upload from list + for i, f := range uploadedFiles { + if f.ID == file.ID { + uploadedFiles = append(uploadedFiles[:i], uploadedFiles[i+1:]...) + break + } + } + + saveUploadConfig(o.UploadDir) + return c.JSON(DeleteStatus{ + Id: file.ID, + Object: "file", + Deleted: true, + }) + } +} + +// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents +func GetFilesContentsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + file, err := getFileFromRequest(c) + if err != nil { + return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + } + + fileContents, err := os.ReadFile(filepath.Join(o.UploadDir, file.Filename)) + if err != nil { + return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + } + + return c.Send(fileContents) + } +} diff --git a/api/openai/files_test.go b/api/openai/files_test.go new file mode 100644 index 00000000..cb111b4a --- /dev/null +++ b/api/openai/files_test.go @@ -0,0 +1,286 @@ +package openai + +import ( + "encoding/json" + "fmt" + config "github.com/go-skynet/LocalAI/api/config" + "github.com/go-skynet/LocalAI/api/options" + utils2 "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/assert" + "io" + "mime/multipart" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + + "testing" +) + +type ListFiles struct { + Data []File + Object string +} + +func startUpApp() (app *fiber.App, option *options.Option, loader *config.ConfigLoader) { + // Preparing the mocked objects + loader = &config.ConfigLoader{} + + option = &options.Option{ + UploadLimitMB: 10, + UploadDir: "test_dir", + } + + _ = os.RemoveAll(option.UploadDir) + + app = fiber.New(fiber.Config{ + BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB. + }) + + // Create a Test Server + app.Post("/files", UploadFilesEndpoint(loader, option)) + app.Get("/files", ListFilesEndpoint(loader, option)) + app.Get("/files/:file_id", GetFilesEndpoint(loader, option)) + app.Delete("/files/:file_id", DeleteFilesEndpoint(loader, option)) + app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option)) + + return +} + +func TestUploadFileExceedSizeLimit(t *testing.T) { + // Preparing the mocked objects + loader := &config.ConfigLoader{} + + option := &options.Option{ + UploadLimitMB: 10, + UploadDir: "test_dir", + } + + _ = os.RemoveAll(option.UploadDir) + + app := fiber.New(fiber.Config{ + BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB. + }) + + // Create a Test Server + app.Post("/files", UploadFilesEndpoint(loader, option)) + app.Get("/files", ListFilesEndpoint(loader, option)) + app.Get("/files/:file_id", GetFilesEndpoint(loader, option)) + app.Delete("/files/:file_id", DeleteFilesEndpoint(loader, option)) + app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option)) + + t.Run("UploadFilesEndpoint file size exceeds limit", func(t *testing.T) { + resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 11, option) + assert.NoError(t, err) + + assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode) + assert.Contains(t, bodyToString(resp, t), "exceeds upload limit") + }) + t.Run("UploadFilesEndpoint purpose not defined", func(t *testing.T) { + resp, _ := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "", 5, option) + + assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode) + assert.Contains(t, bodyToString(resp, t), "Purpose is not defined") + }) + t.Run("UploadFilesEndpoint file already exists", func(t *testing.T) { + f1 := CallFilesUploadEndpointWithCleanup(t, app, "foo.txt", "file", "fine-tune", 5, option) + + resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 5, option) + fmt.Println(f1) + fmt.Printf("ERror: %v", err) + + assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode) + assert.Contains(t, bodyToString(resp, t), "File already exists") + }) + t.Run("UploadFilesEndpoint file uploaded successfully", func(t *testing.T) { + file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option) + + // Check if file exists in the disk + filePath := filepath.Join(option.UploadDir, utils2.SanitizeFileName("test.txt")) + _, err := os.Stat(filePath) + + assert.False(t, os.IsNotExist(err)) + assert.Equal(t, file.Bytes, 5242880) + assert.NotEmpty(t, file.CreatedAt) + assert.Equal(t, file.Filename, "test.txt") + assert.Equal(t, file.Purpose, "fine-tune") + }) + t.Run("ListFilesEndpoint without purpose parameter", func(t *testing.T) { + resp, err := CallListFilesEndpoint(t, app, "") + assert.NoError(t, err) + + assert.Equal(t, 200, resp.StatusCode) + + listFiles := responseToListFile(t, resp) + if len(listFiles.Data) != len(uploadedFiles) { + t.Errorf("Expected %v files, got %v files", len(uploadedFiles), len(listFiles.Data)) + } + }) + t.Run("ListFilesEndpoint with valid purpose parameter", func(t *testing.T) { + _ = CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option) + + resp, err := CallListFilesEndpoint(t, app, "fine-tune") + assert.NoError(t, err) + + listFiles := responseToListFile(t, resp) + if len(listFiles.Data) != 1 { + t.Errorf("Expected 1 file, got %v files", len(listFiles.Data)) + } + }) + t.Run("ListFilesEndpoint with invalid query parameter", func(t *testing.T) { + resp, err := CallListFilesEndpoint(t, app, "not-so-fine-tune") + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + + listFiles := responseToListFile(t, resp) + + if len(listFiles.Data) != 0 { + t.Errorf("Expected 0 file, got %v files", len(listFiles.Data)) + } + }) + t.Run("GetFilesContentsEndpoint get file content", func(t *testing.T) { + req := httptest.NewRequest("GET", "/files", nil) + resp, _ := app.Test(req) + assert.Equal(t, 200, resp.StatusCode) + + var listFiles ListFiles + if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil { + t.Errorf("Failed to decode response: %v", err) + return + } + + if len(listFiles.Data) != 0 { + t.Errorf("Expected 0 file, got %v files", len(listFiles.Data)) + } + }) +} + +func CallListFilesEndpoint(t *testing.T, app *fiber.App, purpose string) (*http.Response, error) { + var target string + if purpose != "" { + target = fmt.Sprintf("/files?purpose=%s", purpose) + } else { + target = "/files" + } + req := httptest.NewRequest("GET", target, nil) + return app.Test(req) +} + +func CallFilesContentEndpoint(t *testing.T, app *fiber.App, fileId string) (*http.Response, error) { + request := httptest.NewRequest("GET", "/files?file_id="+fileId, nil) + return app.Test(request) +} + +func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, o *options.Option) (*http.Response, error) { + // Create a file that exceeds the limit + file := createTestFile(t, fileName, fileSize, o) + + // Creating a new HTTP Request + body, writer := newMultipartFile(file.Name(), tag, purpose) + + req := httptest.NewRequest(http.MethodPost, "/files", body) + req.Header.Set(fiber.HeaderContentType, writer.FormDataContentType()) + return app.Test(req) +} + +func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, o *options.Option) File { + // Create a file that exceeds the limit + file := createTestFile(t, fileName, fileSize, o) + + // Creating a new HTTP Request + body, writer := newMultipartFile(file.Name(), tag, purpose) + + req := httptest.NewRequest(http.MethodPost, "/files", body) + req.Header.Set(fiber.HeaderContentType, writer.FormDataContentType()) + resp, err := app.Test(req) + assert.NoError(t, err) + f := responseToFile(t, resp) + + id := f.ID + t.Cleanup(func() { + _, err := CallFilesDeleteEndpoint(t, app, id) + assert.NoError(t, err) + }) + + return f + +} + +func CallFilesDeleteEndpoint(t *testing.T, app *fiber.App, fileId string) (*http.Response, error) { + target := fmt.Sprintf("/files/%s", fileId) + req := httptest.NewRequest(http.MethodDelete, target, nil) + return app.Test(req) +} + +// Helper to create multi-part file +func newMultipartFile(filePath, tag, purpose string) (*strings.Reader, *multipart.Writer) { + body := new(strings.Builder) + writer := multipart.NewWriter(body) + file, _ := os.Open(filePath) + defer file.Close() + part, _ := writer.CreateFormFile(tag, filepath.Base(filePath)) + io.Copy(part, file) + + if purpose != "" { + _ = writer.WriteField("purpose", purpose) + } + + writer.Close() + return strings.NewReader(body.String()), writer +} + +// Helper to create test files +func createTestFile(t *testing.T, name string, sizeMB int, option *options.Option) *os.File { + err := os.MkdirAll(option.UploadDir, 0755) + if err != nil { + + t.Fatalf("Error MKDIR: %v", err) + } + + file, _ := os.Create(name) + file.WriteString(strings.Repeat("a", sizeMB*1024*1024)) // sizeMB MB File + + t.Cleanup(func() { + os.Remove(name) + os.RemoveAll(option.UploadDir) + }) + return file +} + +func bodyToString(resp *http.Response, t *testing.T) string { + return string(bodyToByteArray(resp, t)) +} + +func bodyToByteArray(resp *http.Response, t *testing.T) []byte { + bodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + return bodyBytes +} + +func responseToFile(t *testing.T, resp *http.Response) File { + var file File + responseToString := bodyToString(resp, t) + + err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file) + if err != nil { + t.Errorf("Failed to decode response: %s", err) + } + + return file +} + +func responseToListFile(t *testing.T, resp *http.Response) ListFiles { + var listFiles ListFiles + responseToString := bodyToString(resp, t) + + err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles) + if err != nil { + fmt.Printf("Failed to decode response: %s", err) + } + + return listFiles +} diff --git a/api/options/options.go b/api/options/options.go index 8c066584..72aea1a3 100644 --- a/api/options/options.go +++ b/api/options/options.go @@ -21,6 +21,7 @@ type Option struct { Debug, DisableMessage bool ImageDir string AudioDir string + UploadDir string CORS bool PreloadJSONModels string PreloadModelsFromPath string @@ -249,6 +250,12 @@ func WithImageDir(imageDir string) AppOption { } } +func WithUploadDir(uploadDir string) AppOption { + return func(o *Option) { + o.UploadDir = uploadDir + } +} + func WithApiKeys(apiKeys []string) AppOption { return func(o *Option) { o.ApiKeys = apiKeys diff --git a/go.mod b/go.mod index 250a2361..bbd787b5 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 - github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 github.com/gofiber/fiber/v2 v2.50.0 github.com/google/uuid v1.3.1 @@ -28,6 +27,7 @@ require ( github.com/rs/zerolog v1.31.0 github.com/sashabaranov/go-openai v1.16.0 github.com/schollz/progressbar/v3 v3.13.1 + github.com/stretchr/testify v1.8.4 github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701 github.com/urfave/cli/v2 v2.25.7 github.com/valyala/fasthttp v1.50.0 @@ -55,6 +55,7 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -68,6 +69,7 @@ require ( github.com/nwaples/rardecode v1.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect github.com/pkoukk/tiktoken-go v0.1.2 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect diff --git a/go.sum b/go.sum index fc00bf6e..20dfbfb4 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,6 @@ github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y= github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e h1:4reMY29i1eOZaRaSTMPNyXI7X8RMNxCTfDDBXYzrbr0= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A= github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY= github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= diff --git a/main.go b/main.go index edf70328..2636b402 100644 --- a/main.go +++ b/main.go @@ -142,6 +142,12 @@ func main() { EnvVars: []string{"AUDIO_PATH"}, Value: "/tmp/generated/audio", }, + &cli.StringFlag{ + Name: "upload-path", + Usage: "Path to store uploads from files api", + EnvVars: []string{"UPLOAD_PATH"}, + Value: "/tmp/localai/upload", + }, &cli.StringFlag{ Name: "backend-assets-path", Usage: "Path used to extract libraries that are required by some of the backends in runtime.", @@ -227,6 +233,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit options.WithDebug(ctx.Bool("debug")), options.WithImageDir(ctx.String("image-path")), options.WithAudioDir(ctx.String("audio-path")), + options.WithUploadDir(ctx.String("upload-path")), options.WithF16(ctx.Bool("f16")), options.WithStringGalleries(ctx.String("galleries")), options.WithModelLibraryURL(ctx.String("remote-library")), diff --git a/pkg/utils/path.go b/pkg/utils/path.go index 05481d2c..f95b0138 100644 --- a/pkg/utils/path.go +++ b/pkg/utils/path.go @@ -3,6 +3,7 @@ package utils import ( "fmt" "path/filepath" + "strings" ) func inTrustedRoot(path string, trustedRoot string) error { @@ -20,3 +21,14 @@ func VerifyPath(path, basePath string) error { c := filepath.Clean(filepath.Join(basePath, path)) return inTrustedRoot(c, filepath.Clean(basePath)) } + +// SanitizeFileName sanitizes the given filename +func SanitizeFileName(fileName string) string { + // filepath.Clean to clean the path + cleanName := filepath.Clean(fileName) + // filepath.Base to ensure we only get the final element, not any directory path + baseName := filepath.Base(cleanName) + // Replace any remaining tricky characters that might have survived cleaning + safeName := strings.ReplaceAll(baseName, "..", "") + return safeName +} From 51b67a247ab0c454e1cc21332748633a13cecd3d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 18 Feb 2024 13:37:16 +0100 Subject: [PATCH 0069/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index af47a5e9..6c34d3e5 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,9 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Upload file API: https://github.com/mudler/LocalAI/pull/1703 +- Tools support: https://github.com/mudler/LocalAI/pull/1715 +- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 - ROCm container images: https://github.com/mudler/LocalAI/pull/1595 - Intel GPU support (sycl): https://github.com/mudler/LocalAI/issues/1653 - Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651 @@ -54,9 +57,6 @@ Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 -- Tools support: https://github.com/mudler/LocalAI/pull/1715 -- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 -- Upload file API: https://github.com/mudler/LocalAI/pull/1703 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 From 4ec50bfc41b1ee0e7c216aba5817cb3169017e6f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 19 Feb 2024 19:03:09 +0100 Subject: [PATCH 0070/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6c34d3e5..45b11328 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 +- Assistant API: https://github.com/mudler/LocalAI/issues/1273 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 From 9f2235c208b8a490f105774f984aa7225c4642b7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 19 Feb 2024 19:49:00 +0100 Subject: [PATCH 0071/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 45b11328..88ea64f2 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Multiple tools calls: https://github.com/mudler/LocalAI/pull/1726 - Upload file API: https://github.com/mudler/LocalAI/pull/1703 - Tools support: https://github.com/mudler/LocalAI/pull/1715 - LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 From ed3b50622bc04b0d3e07bd59aa1407ed948f30d9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 Feb 2024 19:55:36 +0100 Subject: [PATCH 0072/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 88ea64f2..46068031 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,9 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) -- Multiple tools calls: https://github.com/mudler/LocalAI/pull/1726 +- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 - Upload file API: https://github.com/mudler/LocalAI/pull/1703 -- Tools support: https://github.com/mudler/LocalAI/pull/1715 +- Tools API support: https://github.com/mudler/LocalAI/pull/1715 - LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 - ROCm container images: https://github.com/mudler/LocalAI/pull/1595 - Intel GPU support (sycl): https://github.com/mudler/LocalAI/issues/1653 From 960d314e4ffc447356932c6e8d9f9155ebc8da12 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 Feb 2024 21:58:45 +0100 Subject: [PATCH 0073/2895] feat(tools): Parallel function calling (#1726) feat(tools): support returning multiple tools choices Fixes: https://github.com/mudler/LocalAI/issues/1275 --- api/config/config.go | 1 + api/openai/chat.go | 243 +++++++++++++++++++------------- pkg/grammar/json_schema.go | 31 +++- pkg/grammar/json_schema_test.go | 68 ++++++++- 4 files changed, 235 insertions(+), 108 deletions(-) diff --git a/api/config/config.go b/api/config/config.go index 48d1b791..5ea16828 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -148,6 +148,7 @@ type Functions struct { DisableNoAction bool `yaml:"disable_no_action"` NoActionFunctionName string `yaml:"no_action_function_name"` NoActionDescriptionName string `yaml:"no_action_description_name"` + ParallelCalls bool `yaml:"parallel_calls"` } type TemplateConfig struct { diff --git a/api/openai/chat.go b/api/openai/chat.go index 68c3a291..d34f2a0c 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -64,11 +64,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) return true }) - ss := map[string]interface{}{} - name, args := parseFunctionCall(result) - ss["name"], ss["arguments"] = name, args + results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls) + noActionToRun := len(results) > 0 && results[0].name == noAction - if name == noAction { + switch { + case noActionToRun: initialMessage := schema.OpenAIResponse{ ID: id, Created: created, @@ -78,7 +78,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } responses <- initialMessage - result, err := handleQuestion(config, req, o, args, prompt) + result, err := handleQuestion(config, req, o, results[0].arguments, prompt) if err != nil { log.Error().Msgf("error handling question: %s", err.Error()) return @@ -98,52 +98,56 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } responses <- resp - close(responses) - return + + default: + for i, ss := range results { + name, args := ss.name, ss.arguments + + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + responses <- schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Arguments: args, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + } } - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: 0, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - responses <- schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: 0, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Arguments: args, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } close(responses) } @@ -208,9 +212,9 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) // Update input grammar jsStruct := funcs.ToJSONStructure() - config.Grammar = jsStruct.Grammar("") + config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls) } else if input.JSONFunctionGrammarObject != nil { - config.Grammar = input.JSONFunctionGrammarObject.Grammar("") + config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls) } // functions are not supported in stream mode (yet?) @@ -407,57 +411,74 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) })) return nil + // no streaming mode default: result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) { - if processFunctions { - ss := map[string]interface{}{} + if !processFunctions { + // no function is called, just reply and use stop as finish reason + *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) + return + } - name, args := parseFunctionCall(s) - ss["name"], ss["arguments"] = name, args + results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls) + noActionsToRun := len(results) > 0 && results[0].name == noActionName - // if do nothing, reply with a message - if name == noActionName { - result, err := handleQuestion(config, input, o, args, predInput) - if err != nil { - log.Error().Msgf("error handling question: %s", err.Error()) - return - } - *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &result}}) - } else { + switch { + case noActionsToRun: + result, err := handleQuestion(config, input, o, results[0].arguments, predInput) + if err != nil { + log.Error().Msgf("error handling question: %s", err.Error()) + return + } + *c = append(*c, schema.Choice{ + Message: &schema.Message{Role: "assistant", Content: &result}}) + default: + toolChoice := schema.Choice{ + Message: &schema.Message{ + Role: "assistant", + }, + } + + if len(input.Tools) > 0 { + toolChoice.FinishReason = "tool_calls" + } + + for _, ss := range results { + name, args := ss.name, ss.arguments if len(input.Tools) > 0 { - // Result is different in the case we have a tool call - *c = append(*c, schema.Choice{ - FinishReason: "tool_calls", - Message: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - Arguments: args, - }, - }, + // If we are using tools, we condense the function calls into + // a single response choice with all the tools + toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls, + schema.ToolCall{ + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + Arguments: args, }, }, - }) + ) } else { - // otherwise reply with the function call + // otherwise we return more choices directly *c = append(*c, schema.Choice{ FinishReason: "function_call", Message: &schema.Message{ - Role: "assistant", - FunctionCall: ss, + Role: "assistant", + FunctionCall: map[string]interface{}{ + "name": name, + "arguments": args, + }, }, }) } } - return + if len(input.Tools) > 0 { + // we need to append our result if we are using tools + *c = append(*c, toolChoice) + } } - *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) }, nil) if err != nil { return err @@ -528,19 +549,43 @@ func handleQuestion(config *config.Config, input *schema.OpenAIRequest, o *optio return backend.Finetune(*config, prompt, prediction.Response), nil } -func parseFunctionCall(llmresult string) (string, string) { - // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name := ss["function"] - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - d, _ := json.Marshal(args) - - return func_name.(string), string(d) +type funcCallResults struct { + name string + arguments string +} + +func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { + results := []funcCallResults{} + + // TODO: use generics to avoid this code duplication + if multipleResults { + ss := []map[string]interface{}{} + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + for _, s := range ss { + func_name := s["function"] + args := s["arguments"] + d, _ := json.Marshal(args) + results = append(results, funcCallResults{name: func_name.(string), arguments: string(d)}) + } + } else { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name := ss["function"] + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + d, _ := json.Marshal(args) + + results = append(results, funcCallResults{name: func_name.(string), arguments: string(d)}) + } + + return results } diff --git a/pkg/grammar/json_schema.go b/pkg/grammar/json_schema.go index 40d7f4e6..76f9778f 100644 --- a/pkg/grammar/json_schema.go +++ b/pkg/grammar/json_schema.go @@ -105,11 +105,28 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string { return key } -func (sc *JSONSchemaConverter) formatGrammar() string { +const array = `arr ::= + "[\n" ( + realvalue + (",\n" realvalue)* + )? "]"` + +func (sc *JSONSchemaConverter) finalizeGrammar(maybeArray bool) string { var lines []string + // write down the computed rules. + // if maybeArray is true, we need to add the array rule and slightly tweak the root rule for name, rule := range sc.rules { + if maybeArray && name == "root" { + name = "realvalue" + } lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule)) } + + if maybeArray { + lines = append(lines, fmt.Sprintf("%s ::= %s", "root", "arr | realvalue")) + lines = append(lines, array) + } + return strings.Join(lines, "\n") } @@ -234,15 +251,15 @@ func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[strin return def } -func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}) string { +func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, maybeArray bool) string { sc.visit(schema, "", schema) - return sc.formatGrammar() + return sc.finalizeGrammar(maybeArray) } -func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte) string { +func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, maybeArray bool) string { var schema map[string]interface{} _ = json.Unmarshal(b, &schema) - return sc.Grammar(schema) + return sc.Grammar(schema, maybeArray) } func jsonString(v interface{}) string { @@ -275,7 +292,7 @@ type JSONFunctionStructure struct { Defs map[string]interface{} `json:"$defs,omitempty"` } -func (j JSONFunctionStructure) Grammar(propOrder string) string { +func (j JSONFunctionStructure) Grammar(propOrder string, maybeArray bool) string { dat, _ := json.Marshal(j) - return NewJSONSchemaConverter(propOrder).GrammarFromBytes(dat) + return NewJSONSchemaConverter(propOrder).GrammarFromBytes(dat, maybeArray) } diff --git a/pkg/grammar/json_schema_test.go b/pkg/grammar/json_schema_test.go index 9d4086cb..39d2a4d5 100644 --- a/pkg/grammar/json_schema_test.go +++ b/pkg/grammar/json_schema_test.go @@ -52,13 +52,32 @@ string ::= "\"" ( [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) )* "\"" space +root-1-function ::= "\"search\""` + + inputResult2 = `root-0-function ::= "\"create_event\"" +root-0 ::= "{" space "\"arguments\"" space ":" space root-0-arguments "," space "\"function\"" space ":" space root-0-function "}" space +root-1-arguments ::= "{" space "\"query\"" space ":" space string "}" space +realvalue ::= root-0 | root-1 +root ::= arr | realvalue +space ::= " "? +root-0-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space +root-1 ::= "{" space "\"arguments\"" space ":" space root-1-arguments "," space "\"function\"" space ":" space root-1-function "}" space +string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) +)* "\"" space +arr ::= + "[\n" ( + realvalue + (",\n" realvalue)* + )? "]" root-1-function ::= "\"search\""` ) var _ = Describe("JSON schema grammar tests", func() { Context("JSON", func() { It("generates a valid grammar from JSON schema", func() { - grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1)) + grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1), false) results := strings.Split(inputResult1, "\n") for _, r := range results { if r != "" { @@ -103,7 +122,7 @@ var _ = Describe("JSON schema grammar tests", func() { }, }} - grammar := structuredGrammar.Grammar("") + grammar := structuredGrammar.Grammar("", false) results := strings.Split(inputResult1, "\n") for _, r := range results { if r != "" { @@ -112,5 +131,50 @@ var _ = Describe("JSON schema grammar tests", func() { } Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n")))) }) + + It("generates a valid grammar from JSON Objects for multiple function return", func() { + structuredGrammar := JSONFunctionStructure{ + OneOf: []Item{ + { + Type: "object", + Properties: Properties{ + Function: FunctionName{ + Const: "create_event", + }, + Arguments: Argument{ // this is OpenAI's parameter + Type: "object", + Properties: map[string]interface{}{ + "title": map[string]string{"type": "string"}, + "date": map[string]string{"type": "string"}, + "time": map[string]string{"type": "string"}, + }, + }, + }, + }, + { + Type: "object", + Properties: Properties{ + Function: FunctionName{ + Const: "search", + }, + Arguments: Argument{ + Type: "object", + Properties: map[string]interface{}{ + "query": map[string]string{"type": "string"}, + }, + }, + }, + }, + }} + + grammar := structuredGrammar.Grammar("", true) + results := strings.Split(inputResult2, "\n") + for _, r := range results { + if r != "" { + Expect(grammar).To(ContainSubstring(r)) + } + } + Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))), grammar) + }) }) }) From 594eb468df621efccbf585e52c226acb8b382dfb Mon Sep 17 00:00:00 2001 From: Chakib Benziane Date: Tue, 20 Feb 2024 21:59:43 +0100 Subject: [PATCH 0074/2895] Add TTS dependency for cuda based builds fixes #1727 (#1730) Signed-off-by: Chakib Benziane --- backend/python/common-env/transformers/transformers-nvidia.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index 62133559..d5fe07b4 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -33,6 +33,7 @@ dependencies: - boto3==1.28.61 - botocore==1.31.61 - certifi==2023.7.22 + - TTS==0.22.0 - charset-normalizer==3.3.0 - datasets==2.14.5 - sentence-transformers==2.2.2 From 255748bcba19ebc63ad7e0f4f701ccb027aab481 Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 20 Feb 2024 20:21:19 -0500 Subject: [PATCH 0075/2895] MQTT Startup Refactoring Part 1: core/ packages part 1 (#1728) This PR specifically introduces a `core` folder and moves the following packages over, without any other changes: - `api/backend` - `api/config` - `api/options` - `api/schema` Once this is merged and we confirm there's no regressions, I can migrate over the remaining changes piece by piece to split up application startup, backend services, http, and mqtt as was the goal of the earlier PRs! --- api/localai/backend_monitor.go | 4 ++-- api/localai/gallery.go | 2 +- api/localai/localai.go | 6 +++--- api/openai/chat.go | 8 ++++---- api/openai/completion.go | 8 ++++---- api/openai/edit.go | 8 ++++---- api/openai/embeddings.go | 8 ++++---- api/openai/files.go | 11 ++++++----- api/openai/files_test.go | 11 ++++++----- api/openai/image.go | 8 ++++---- api/openai/inference.go | 8 ++++---- api/openai/list.go | 4 ++-- api/openai/request.go | 6 +++--- api/openai/transcription.go | 6 +++--- backend/go/transcribe/transcript.go | 2 +- backend/go/transcribe/whisper.go | 2 +- {api => core}/backend/embeddings.go | 4 ++-- {api => core}/backend/image.go | 4 ++-- {api => core}/backend/llm.go | 4 ++-- {api => core}/backend/options.go | 4 ++-- {api => core}/backend/transcript.go | 6 +++--- {api => core}/backend/tts.go | 7 +++---- {api => core}/config/config.go | 2 +- {api => core}/config/config_test.go | 6 +++--- {api => core}/config/prediction.go | 2 +- {api => core/http}/api.go | 8 ++++---- {api => core/http}/api_test.go | 6 +++--- {api => core/http}/apt_suite_test.go | 2 +- {api => core}/options/options.go | 0 {api => core}/schema/openai.go | 2 +- {api => core}/schema/whisper.go | 0 main.go | 8 ++++---- pkg/grpc/backend.go | 3 ++- pkg/grpc/base/base.go | 2 +- pkg/grpc/client.go | 2 +- pkg/grpc/embed.go | 5 +++-- pkg/grpc/interface.go | 2 +- tests/integration/reflect_test.go | 2 +- 38 files changed, 93 insertions(+), 90 deletions(-) rename {api => core}/backend/embeddings.go (95%) rename {api => core}/backend/image.go (94%) rename {api => core}/backend/llm.go (97%) rename {api => core}/backend/options.go (97%) rename {api => core}/backend/transcript.go (86%) rename {api => core}/backend/tts.go (90%) rename {api => core}/config/config.go (99%) rename {api => core}/config/config_test.go (93%) rename {api => core}/config/prediction.go (99%) rename {api => core/http}/api.go (98%) rename {api => core/http}/api_test.go (99%) rename {api => core/http}/apt_suite_test.go (90%) rename {api => core}/options/options.go (100%) rename {api => core}/schema/openai.go (98%) rename {api => core}/schema/whisper.go (100%) diff --git a/api/localai/backend_monitor.go b/api/localai/backend_monitor.go index 8cb0bb45..e6f1b409 100644 --- a/api/localai/backend_monitor.go +++ b/api/localai/backend_monitor.go @@ -5,10 +5,10 @@ import ( "fmt" "strings" - config "github.com/go-skynet/LocalAI/api/config" + config "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/core/options" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" diff --git a/api/localai/gallery.go b/api/localai/gallery.go index a2ad5bd1..ee6f4d7d 100644 --- a/api/localai/gallery.go +++ b/api/localai/gallery.go @@ -11,7 +11,7 @@ import ( json "github.com/json-iterator/go" "gopkg.in/yaml.v3" - config "github.com/go-skynet/LocalAI/api/config" + config "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/utils" diff --git a/api/localai/localai.go b/api/localai/localai.go index 3abe440e..9d5bbf6c 100644 --- a/api/localai/localai.go +++ b/api/localai/localai.go @@ -1,12 +1,12 @@ package localai import ( - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" fiberContext "github.com/go-skynet/LocalAI/api/ctx" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" "github.com/rs/zerolog/log" - "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/core/options" "github.com/gofiber/fiber/v2" ) diff --git a/api/openai/chat.go b/api/openai/chat.go index d34f2a0c..78d02f96 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -8,10 +8,10 @@ import ( "strings" "time" - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" diff --git a/api/openai/completion.go b/api/openai/completion.go index b098451d..af56625e 100644 --- a/api/openai/completion.go +++ b/api/openai/completion.go @@ -8,10 +8,10 @@ import ( "fmt" "time" - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" diff --git a/api/openai/edit.go b/api/openai/edit.go index 16679ae5..56b17920 100644 --- a/api/openai/edit.go +++ b/api/openai/edit.go @@ -5,10 +5,10 @@ import ( "fmt" "time" - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/schema" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/google/uuid" diff --git a/api/openai/embeddings.go b/api/openai/embeddings.go index 44feb373..198493e1 100644 --- a/api/openai/embeddings.go +++ b/api/openai/embeddings.go @@ -5,12 +5,12 @@ import ( "fmt" "time" - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/google/uuid" - "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/core/options" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) diff --git a/api/openai/files.go b/api/openai/files.go index f19e79d8..57f5c48d 100644 --- a/api/openai/files.go +++ b/api/openai/files.go @@ -4,14 +4,15 @@ import ( "encoding/json" "errors" "fmt" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/gofiber/fiber/v2" - "github.com/rs/zerolog/log" "os" "path/filepath" "time" + + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" ) var uploadedFiles []File diff --git a/api/openai/files_test.go b/api/openai/files_test.go index cb111b4a..535cde8b 100644 --- a/api/openai/files_test.go +++ b/api/openai/files_test.go @@ -3,11 +3,6 @@ package openai import ( "encoding/json" "fmt" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" - utils2 "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/gofiber/fiber/v2" - "github.com/stretchr/testify/assert" "io" "mime/multipart" "net/http" @@ -16,6 +11,12 @@ import ( "path/filepath" "strings" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + utils2 "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/assert" + "testing" ) diff --git a/api/openai/image.go b/api/openai/image.go index 07f028f0..2da6883e 100644 --- a/api/openai/image.go +++ b/api/openai/image.go @@ -13,12 +13,12 @@ import ( "strings" "time" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/schema" "github.com/google/uuid" - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" diff --git a/api/openai/inference.go b/api/openai/inference.go index 816c960c..184688b2 100644 --- a/api/openai/inference.go +++ b/api/openai/inference.go @@ -1,10 +1,10 @@ package openai import ( - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/schema" model "github.com/go-skynet/LocalAI/pkg/model" ) diff --git a/api/openai/list.go b/api/openai/list.go index 8bc5bbe2..614d5c80 100644 --- a/api/openai/list.go +++ b/api/openai/list.go @@ -3,8 +3,8 @@ package openai import ( "regexp" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/schema" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) diff --git a/api/openai/request.go b/api/openai/request.go index 6a7a14e8..83c41d97 100644 --- a/api/openai/request.go +++ b/api/openai/request.go @@ -9,10 +9,10 @@ import ( "net/http" "strings" - config "github.com/go-skynet/LocalAI/api/config" fiberContext "github.com/go-skynet/LocalAI/api/ctx" - options "github.com/go-skynet/LocalAI/api/options" - "github.com/go-skynet/LocalAI/api/schema" + config "github.com/go-skynet/LocalAI/core/config" + options "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" diff --git a/api/openai/transcription.go b/api/openai/transcription.go index 668a2069..c3fd7d5c 100644 --- a/api/openai/transcription.go +++ b/api/openai/transcription.go @@ -8,9 +8,9 @@ import ( "path" "path/filepath" - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index ebd43eca..dc331cae 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -8,7 +8,7 @@ import ( "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" "github.com/go-audio/wav" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/schema" ) func sh(c string) (string, error) { diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go index a033afb0..ac93be01 100644 --- a/backend/go/transcribe/whisper.go +++ b/backend/go/transcribe/whisper.go @@ -4,7 +4,7 @@ package main // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) import ( "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/grpc/base" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" ) diff --git a/api/backend/embeddings.go b/core/backend/embeddings.go similarity index 95% rename from api/backend/embeddings.go rename to core/backend/embeddings.go index 0cf15fea..d8b89e12 100644 --- a/api/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -3,8 +3,8 @@ package backend import ( "fmt" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/pkg/grpc" model "github.com/go-skynet/LocalAI/pkg/model" ) diff --git a/api/backend/image.go b/core/backend/image.go similarity index 94% rename from api/backend/image.go rename to core/backend/image.go index 6183269f..12ea57ce 100644 --- a/api/backend/image.go +++ b/core/backend/image.go @@ -1,8 +1,8 @@ package backend import ( - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" ) diff --git a/api/backend/llm.go b/core/backend/llm.go similarity index 97% rename from api/backend/llm.go rename to core/backend/llm.go index 9e202c53..d1081ad6 100644 --- a/api/backend/llm.go +++ b/core/backend/llm.go @@ -8,8 +8,8 @@ import ( "sync" "unicode/utf8" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" model "github.com/go-skynet/LocalAI/pkg/model" diff --git a/api/backend/options.go b/core/backend/options.go similarity index 97% rename from api/backend/options.go rename to core/backend/options.go index 38f56068..9710ac17 100644 --- a/api/backend/options.go +++ b/core/backend/options.go @@ -7,8 +7,8 @@ import ( pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" ) func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.Option { diff --git a/api/backend/transcript.go b/core/backend/transcript.go similarity index 86% rename from api/backend/transcript.go rename to core/backend/transcript.go index 77427839..1cbaf820 100644 --- a/api/backend/transcript.go +++ b/core/backend/transcript.go @@ -4,10 +4,10 @@ import ( "context" "fmt" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/schema" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" ) diff --git a/api/backend/tts.go b/core/backend/tts.go similarity index 90% rename from api/backend/tts.go rename to core/backend/tts.go index 6e5ffcc0..a9d7153f 100644 --- a/api/backend/tts.go +++ b/core/backend/tts.go @@ -6,9 +6,8 @@ import ( "os" "path/filepath" - api_config "github.com/go-skynet/LocalAI/api/config" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" @@ -38,7 +37,7 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *opt grpcOpts := gRPCModelOpts(c) - opts := modelOpts(api_config.Config{}, o, []model.Option{ + opts := modelOpts(config.Config{}, o, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), model.WithContext(o.Context), diff --git a/api/config/config.go b/core/config/config.go similarity index 99% rename from api/config/config.go rename to core/config/config.go index 5ea16828..af203ecc 100644 --- a/api/config/config.go +++ b/core/config/config.go @@ -1,4 +1,4 @@ -package api_config +package config import ( "errors" diff --git a/api/config/config_test.go b/core/config/config_test.go similarity index 93% rename from api/config/config_test.go rename to core/config/config_test.go index 4b00d587..d1e92d5c 100644 --- a/api/config/config_test.go +++ b/core/config/config_test.go @@ -1,10 +1,10 @@ -package api_config_test +package config_test import ( "os" - . "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + . "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/pkg/model" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" diff --git a/api/config/prediction.go b/core/config/prediction.go similarity index 99% rename from api/config/prediction.go rename to core/config/prediction.go index d2fbb1fa..dccb4dfb 100644 --- a/api/config/prediction.go +++ b/core/config/prediction.go @@ -1,4 +1,4 @@ -package api_config +package config type PredictionOptions struct { diff --git a/api/api.go b/core/http/api.go similarity index 98% rename from api/api.go rename to core/http/api.go index 4442421e..7d228152 100644 --- a/api/api.go +++ b/core/http/api.go @@ -1,4 +1,4 @@ -package api +package http import ( "encoding/json" @@ -7,11 +7,11 @@ import ( "os" "strings" - config "github.com/go-skynet/LocalAI/api/config" "github.com/go-skynet/LocalAI/api/localai" "github.com/go-skynet/LocalAI/api/openai" - "github.com/go-skynet/LocalAI/api/options" - "github.com/go-skynet/LocalAI/api/schema" + config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/metrics" "github.com/go-skynet/LocalAI/pkg/assets" diff --git a/api/api_test.go b/core/http/api_test.go similarity index 99% rename from api/api_test.go rename to core/http/api_test.go index 04d2d6fe..9068b393 100644 --- a/api/api_test.go +++ b/core/http/api_test.go @@ -1,4 +1,4 @@ -package api_test +package http_test import ( "bytes" @@ -13,8 +13,8 @@ import ( "path/filepath" "runtime" - . "github.com/go-skynet/LocalAI/api" - "github.com/go-skynet/LocalAI/api/options" + . "github.com/go-skynet/LocalAI/core/http" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/metrics" "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/gallery" diff --git a/api/apt_suite_test.go b/core/http/apt_suite_test.go similarity index 90% rename from api/apt_suite_test.go rename to core/http/apt_suite_test.go index e3c15c04..0269a973 100644 --- a/api/apt_suite_test.go +++ b/core/http/apt_suite_test.go @@ -1,4 +1,4 @@ -package api_test +package http_test import ( "testing" diff --git a/api/options/options.go b/core/options/options.go similarity index 100% rename from api/options/options.go rename to core/options/options.go diff --git a/api/schema/openai.go b/core/schema/openai.go similarity index 98% rename from api/schema/openai.go rename to core/schema/openai.go index 12a39b42..23abd7b7 100644 --- a/api/schema/openai.go +++ b/core/schema/openai.go @@ -3,7 +3,7 @@ package schema import ( "context" - config "github.com/go-skynet/LocalAI/api/config" + config "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/grammar" ) diff --git a/api/schema/whisper.go b/core/schema/whisper.go similarity index 100% rename from api/schema/whisper.go rename to core/schema/whisper.go diff --git a/main.go b/main.go index 2636b402..7e4262ee 100644 --- a/main.go +++ b/main.go @@ -12,10 +12,10 @@ import ( "syscall" "time" - api "github.com/go-skynet/LocalAI/api" - "github.com/go-skynet/LocalAI/api/backend" - config "github.com/go-skynet/LocalAI/api/config" - "github.com/go-skynet/LocalAI/api/options" + "github.com/go-skynet/LocalAI/core/backend" + config "github.com/go-skynet/LocalAI/core/config" + api "github.com/go-skynet/LocalAI/core/http" + "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/metrics" "github.com/go-skynet/LocalAI/pkg/gallery" diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index ae8ffc5f..22933d58 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -2,7 +2,8 @@ package grpc import ( "context" - "github.com/go-skynet/LocalAI/api/schema" + + "github.com/go-skynet/LocalAI/core/schema" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" "google.golang.org/grpc" ) diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index 739d1cbb..89c8785e 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -6,7 +6,7 @@ import ( "fmt" "os" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/schema" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" gopsutil "github.com/shirou/gopsutil/v3/process" ) diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 5e97ea73..9058db05 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -7,7 +7,7 @@ import ( "sync" "time" - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/schema" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index b9ab551f..228b1df5 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -2,11 +2,12 @@ package grpc import ( "context" - "github.com/go-skynet/LocalAI/api/schema" + "time" + + "github.com/go-skynet/LocalAI/core/schema" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" "google.golang.org/grpc" "google.golang.org/grpc/metadata" - "time" ) var _ Backend = new(embedBackend) diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index a76261c1..1cc7cb3d 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -1,7 +1,7 @@ package grpc import ( - "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/core/schema" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" ) diff --git a/tests/integration/reflect_test.go b/tests/integration/reflect_test.go index c0fe7096..bf3f8a5b 100644 --- a/tests/integration/reflect_test.go +++ b/tests/integration/reflect_test.go @@ -3,7 +3,7 @@ package integration_test import ( "reflect" - config "github.com/go-skynet/LocalAI/api/config" + config "github.com/go-skynet/LocalAI/core/config" model "github.com/go-skynet/LocalAI/pkg/model" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" From 54ec6348fadc2202d7af6f4ba3656a0adf06010c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 21 Feb 2024 11:35:44 +0100 Subject: [PATCH 0076/2895] deps(llama.cpp): update (#1714) Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 152 +++++++++++++++++++++--------- 2 files changed, 106 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 31434e50..0a5d030b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=f026f8120f97090d34a52b3dc023c82e0ede3f7d +CPPLLAMA_VERSION?=9350a1cf21b1492c69b20175b73a419b897d6a3a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 954e472a..89169eea 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -51,6 +51,7 @@ struct server_params std::string hostname = "127.0.0.1"; std::vector api_keys; std::string public_path = "examples/server/public"; + std::string chat_template = "chatml"; int32_t port = 8080; int32_t read_timeout = 600; int32_t write_timeout = 600; @@ -349,6 +350,7 @@ struct llama_server_context // slots / clients std::vector slots; + json default_generation_settings_for_props; llama_server_queue queue_tasks; llama_server_response queue_results; @@ -445,6 +447,9 @@ struct llama_server_context slots.push_back(slot); } + default_generation_settings_for_props = get_formated_generation(slots.front()); + default_generation_settings_for_props["seed"] = -1; + batch = llama_batch_init(n_ctx, 0, params.n_parallel); // empty system prompt @@ -527,27 +532,29 @@ struct llama_server_context slot_params default_params; llama_sampling_params default_sparams; - slot->params.stream = json_value(data, "stream", false); - slot->params.cache_prompt = json_value(data, "cache_prompt", false); - slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); - slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); - slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); - slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); - slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z); - slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p); - slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); - slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n); - slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat); - slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq); - slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present); - slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); - slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); - slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); - slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); - slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); - slot->params.seed = json_value(data, "seed", default_params.seed); - slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); - slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); + slot->params.stream = json_value(data, "stream", false); + slot->params.cache_prompt = json_value(data, "cache_prompt", false); + slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); + slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); + slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); + slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); + slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z); + slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p); + slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); + slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range); + slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent); + slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n); + slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat); + slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq); + slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present); + slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); + slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); + slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); + slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); + slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); + slot->params.seed = json_value(data, "seed", default_params.seed); + slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); + slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); // infill if (data.count("input_prefix") != 0) @@ -626,18 +633,36 @@ struct llama_server_context const int n_vocab = llama_n_vocab(model); for (const auto &el : *logit_bias) { - if (el.is_array() && el.size() == 2 && el[0].is_number_integer()) + if (el.is_array() && el.size() == 2) { - llama_token tok = el[0].get(); - if (tok >= 0 && tok < n_vocab) + float bias; + if (el[1].is_number()) { - if (el[1].is_number()) + bias = el[1].get(); + } + else if (el[1].is_boolean() && !el[1].get()) + { + bias = -INFINITY; + } + else + { + continue; + } + + if (el[0].is_number_integer()) + { + llama_token tok = el[0].get(); + if (tok >= 0 && tok < n_vocab) { - slot->sparams.logit_bias[tok] = el[1].get(); + slot->sparams.logit_bias[tok] = bias; } - else if (el[1].is_boolean() && !el[1].get()) + } + else if (el[0].is_string()) + { + auto toks = llama_tokenize(model, el[0].get(), false); + for (auto tok : toks) { - slot->sparams.logit_bias[tok] = -INFINITY; + slot->sparams.logit_bias[tok] = bias; } } } @@ -950,18 +975,31 @@ struct llama_server_context { continue; } - clip_image_f32 * img_res = clip_image_f32_init(); - if (!clip_image_preprocess(clp_ctx, img.img_data, img_res, /*pad2square =*/ true)) + clip_image_f32_batch img_res_v; + img_res_v.size = 0; + img_res_v.data = nullptr; + if (!clip_image_preprocess(clp_ctx, img.img_data, img_res_v)) { LOG_TEE("Error processing the given image"); clip_free(clp_ctx); + clip_image_f32_batch_free(img_res_v); return false; } + if (img_res_v.size == 0) + { + LOG_TEE("Error processing the given image"); + return false; + } + + // note: assumes only one image was returned by clip_image_preprocess + clip_image_f32 * img_res = img_res_v.data; + img.image_tokens = clip_n_patches(clp_ctx); img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx)); if (!img.image_embedding) { LOG_TEE("Unable to allocate memory for image embeddings\n"); + clip_image_f32_batch_free(img_res_v); clip_free(clp_ctx); return false; } @@ -969,9 +1007,12 @@ struct llama_server_context if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding)) { LOG_TEE("Unable to encode image\n"); + clip_image_f32_batch_free(img_res_v); return false; } - clip_image_f32_free(img_res); + + clip_image_f32_batch_free(img_res_v); + img.request_encode_image = false; } @@ -990,11 +1031,6 @@ struct llama_server_context queue_results.send(res); } - json get_model_props() - { - return get_formated_generation(slots[0]); - } - json get_formated_generation(llama_client_slot &slot) { const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model)); @@ -1005,6 +1041,8 @@ struct llama_server_context {"model", params.model_alias}, {"seed", slot.params.seed}, {"temperature", slot.sparams.temp}, + {"dynatemp_range", slot.sparams.dynatemp_range}, + {"dynatemp_exponent", slot.sparams.dynatemp_exponent}, {"top_k", slot.sparams.top_k}, {"top_p", slot.sparams.top_p}, {"min_p", slot.sparams.min_p}, @@ -1166,13 +1204,30 @@ struct llama_server_context task.multitask_id = multitask_id; // when a completion task's prompt array is not a singleton, we split it into multiple requests - if (task.data.count("prompt") && task.data.at("prompt").size() > 1) - { - split_multiprompt_task(task_id, task); - } - // otherwise, it's a single-prompt task, we actually queue it - queue_tasks.post(task); + // if there's numbers in the prompt array it will be treated as an array of tokens + if (task.data.count("prompt") != 0 && task.data.at("prompt").size() > 1) { + bool numbers = false; + for (const auto& e : task.data.at("prompt")) { + if (e.is_number()) { + numbers = true; + break; + } + } + + // NOTE: split_multiprompt_task() does not handle a mix of strings and numbers, + // it will completely stall the server. I don't know where the bug for this is. + // + // if there are numbers, it needs to be treated like a single prompt, + // queue_tasks handles a mix of strings and numbers just fine. + if (numbers) { + queue_tasks.post(task); + } else { + split_multiprompt_task(task_id, task); + } + } else { + queue_tasks.post(task); + } } // for multiple images processing @@ -1254,7 +1309,10 @@ struct llama_server_context void split_multiprompt_task(int multitask_id, task_server& multiprompt_task) { int prompt_count = multiprompt_task.data.at("prompt").size(); - assert(prompt_count > 1); + if (prompt_count <= 1) { + send_error(multiprompt_task, "error while handling multiple prompts"); + return; + } // generate all the ID for subtask std::vector subtask_ids(prompt_count); @@ -1566,10 +1624,6 @@ struct llama_server_context LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed); } - LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past); - - llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1); - slot.cache_tokens = prompt_tokens; if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0) @@ -1583,6 +1637,10 @@ struct llama_server_context } } + LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past); + + llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1); + LOG_VERBOSE("prompt ingested", { {"n_past", slot.n_past}, {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)}, From 8292781045a968fe7c653a385a7a30e4611c3cf3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 21 Feb 2024 17:23:38 +0100 Subject: [PATCH 0077/2895] deps(llama.cpp): update, support Gemma models (#1734) deps(llama.cpp): update Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/CMakeLists.txt | 10 ++- backend/cpp/llama/Makefile | 3 + backend/cpp/llama/grpc-server.cpp | 137 ++++++++++++++++-------------- 4 files changed, 85 insertions(+), 67 deletions(-) diff --git a/Makefile b/Makefile index 0a5d030b..8da648dd 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=9350a1cf21b1492c69b20175b73a419b897d6a3a +CPPLLAMA_VERSION?=88c46cbdac05cebd936511b1d3c74112e721615f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all diff --git a/backend/cpp/llama/CMakeLists.txt b/backend/cpp/llama/CMakeLists.txt index 8299705a..031e4964 100644 --- a/backend/cpp/llama/CMakeLists.txt +++ b/backend/cpp/llama/CMakeLists.txt @@ -2,16 +2,20 @@ ## XXX: In some versions of CMake clip wasn't being built before llama. ## This is an hack for now, but it should be fixed in the future. set(TARGET myclip) -add_library(${TARGET} clip.cpp clip.h) +add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h) install(TARGETS ${TARGET} LIBRARY) -target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT}) +target_include_directories(myclip PUBLIC .) +target_include_directories(myclip PUBLIC ../..) +target_include_directories(myclip PUBLIC ../../common) +target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if (NOT MSVC) target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h endif() +# END CLIP hack + set(TARGET grpc-server) -# END CLIP hack set(CMAKE_CXX_STANDARD 17) cmake_minimum_required(VERSION 3.15) set(TARGET grpc-server) diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index b050b620..d6d8ae90 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -45,6 +45,9 @@ llama.cpp/examples/grpc-server: ## XXX: In some versions of CMake clip wasn't being built before llama. ## This is an hack for now, but it should be fixed in the future. cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h + cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp + echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h + cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp rebuild: diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 89169eea..0066c16d 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -11,7 +11,8 @@ #include #include #include -#include "../llava/clip.h" +#include "clip.h" +#include "llava.h" #include "stb_image.h" #include "common.h" #include "json.hpp" @@ -32,6 +33,7 @@ #include #include #include +#include using grpc::Server; using grpc::ServerBuilder; @@ -51,10 +53,11 @@ struct server_params std::string hostname = "127.0.0.1"; std::vector api_keys; std::string public_path = "examples/server/public"; - std::string chat_template = "chatml"; + std::string chat_template = ""; int32_t port = 8080; int32_t read_timeout = 600; int32_t write_timeout = 600; + bool slots_endpoint = true; }; bool server_verbose = false; @@ -173,6 +176,7 @@ struct llama_client_slot int32_t n_decoded = 0; int32_t n_remaining = -1; int32_t i_batch = -1; + int32_t n_predict = -1; int32_t num_prompt_tokens = 0; int32_t num_prompt_tokens_processed = 0; @@ -424,6 +428,7 @@ struct llama_server_context slot.id = i; slot.n_ctx = n_ctx_slot; + slot.n_predict = params.n_predict; LOG_TEE(" -> Slot %i - max context: %i\n", slot.id, n_ctx_slot); @@ -451,10 +456,6 @@ struct llama_server_context default_generation_settings_for_props["seed"] = -1; batch = llama_batch_init(n_ctx, 0, params.n_parallel); - - // empty system prompt - system_prompt = ""; - system_tokens.clear(); } std::vector tokenize(const json & json_prompt, bool add_bos) const @@ -531,7 +532,7 @@ struct llama_server_context bool launch_slot_with_data(llama_client_slot* &slot, json data) { slot_params default_params; llama_sampling_params default_sparams; - + slot->params.stream = json_value(data, "stream", false); slot->params.cache_prompt = json_value(data, "cache_prompt", false); slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); @@ -555,6 +556,16 @@ struct llama_server_context slot->params.seed = json_value(data, "seed", default_params.seed); slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); + slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep); + + if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) { + // Might be better to reject the request with a 400 ? + LOG_WARNING("Max tokens to predict exceeds server configuration", { + {"params.n_predict", slot->params.n_predict}, + {"slot.n_predict", slot->n_predict}, + }); + slot->params.n_predict = slot->n_predict; + } // infill if (data.count("input_prefix") != 0) @@ -683,6 +694,24 @@ struct llama_server_context } } + const auto &samplers_sequence = data.find("samplers"); + if (samplers_sequence != data.end() && samplers_sequence->is_array()) + { + std::vector sampler_names; + for (const auto &sampler_name : *samplers_sequence) + { + if (sampler_name.is_string()) + { + sampler_names.emplace_back(sampler_name); + } + } + slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false); + } + else + { + slot->sparams.samplers_sequence = default_sparams.samplers_sequence; + } + if (multimodal) { const auto &images_data = data.find("image_data"); @@ -772,27 +801,30 @@ struct llama_server_context } void update_system_prompt() { - system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); - - llama_batch_clear(batch); - kv_cache_clear(); + system_tokens.clear(); - for (int i = 0; i < (int) system_tokens.size(); ++i) - { - llama_batch_add(batch, system_tokens[i], i, { 0 }, false); - } + if (!system_prompt.empty()) { + system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); - if (llama_decode(ctx, batch) != 0) - { - LOG_TEE("%s: llama_decode() failed\n", __func__); - return; - } + llama_batch_clear(batch); - // assign the system KV cache to all parallel sequences - for (int32_t i = 1; i < params.n_parallel; ++i) - { - llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size()); + for (int i = 0; i < (int)system_tokens.size(); ++i) + { + llama_batch_add(batch, system_tokens[i], i, { 0 }, false); + } + + if (llama_decode(ctx, batch) != 0) + { + LOG_TEE("%s: llama_decode() failed\n", __func__); + return; + } + + // assign the system KV cache to all parallel sequences + for (int32_t i = 1; i < params.n_parallel; ++i) + { + llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size()); + } } LOG_TEE("system prompt updated\n"); @@ -814,10 +846,8 @@ struct llama_server_context name_user = sys_props.value("anti_prompt", ""); name_assistant = sys_props.value("assistant_name", ""); - if (slots.size() > 0) - { - notify_system_prompt_changed(); - } + + notify_system_prompt_changed(); } static size_t find_stopping_strings(const std::string &text, const size_t last_token_size, @@ -975,44 +1005,12 @@ struct llama_server_context { continue; } - clip_image_f32_batch img_res_v; - img_res_v.size = 0; - img_res_v.data = nullptr; - if (!clip_image_preprocess(clp_ctx, img.img_data, img_res_v)) - { - LOG_TEE("Error processing the given image"); - clip_free(clp_ctx); - clip_image_f32_batch_free(img_res_v); - return false; - } - if (img_res_v.size == 0) - { + + if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) { LOG_TEE("Error processing the given image"); return false; } - // note: assumes only one image was returned by clip_image_preprocess - clip_image_f32 * img_res = img_res_v.data; - - img.image_tokens = clip_n_patches(clp_ctx); - img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx)); - if (!img.image_embedding) - { - LOG_TEE("Unable to allocate memory for image embeddings\n"); - clip_image_f32_batch_free(img_res_v); - clip_free(clp_ctx); - return false; - } - LOG_TEE("slot %i - encoding image [id: %i]\n", slot.id, img.id); - if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding)) - { - LOG_TEE("Unable to encode image\n"); - clip_image_f32_batch_free(img_res_v); - return false; - } - - clip_image_f32_batch_free(img_res_v); - img.request_encode_image = false; } @@ -1036,8 +1034,15 @@ struct llama_server_context const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model)); const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() && eos_bias->second < 0.0f && std::isinf(eos_bias->second); + std::vector samplers_sequence; + for (const auto &sampler_type : slot.sparams.samplers_sequence) + { + samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type)); + } + return json { {"n_ctx", slot.n_ctx}, + {"n_predict", slot.n_predict}, {"model", params.model_alias}, {"seed", slot.params.seed}, {"temperature", slot.sparams.temp}, @@ -1065,7 +1070,9 @@ struct llama_server_context {"stream", slot.params.stream}, {"logit_bias", slot.sparams.logit_bias}, {"n_probs", slot.sparams.n_probs}, + {"min_keep", slot.sparams.min_keep}, {"grammar", slot.sparams.grammar}, + {"samplers", samplers_sequence} }; } @@ -1877,6 +1884,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con } } +std::function shutdown_handler; +inline void signal_handler(int signal) { shutdown_handler(signal); } + ///////////////////////////////// //////////////////////////////// //////// LOCALAI code starts below here @@ -2147,7 +2157,8 @@ public: gpt_params params; params_parse(request, params); - llama_backend_init(params.numa); + llama_backend_init(); + llama_numa_init(params.numa); // load the model if (!llama.load_model(params)) From ad3623dd8d3fe4e5fcdd214866896b7d8c304345 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 21 Feb 2024 23:17:51 +0100 Subject: [PATCH 0078/2895] examples(phi-2): strip newline at the end of the prompt template Signed-off-by: Ettore Di Giacinto --- examples/configurations/phi-2.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/configurations/phi-2.yaml b/examples/configurations/phi-2.yaml index 8f193866..cac1e9da 100644 --- a/examples/configurations/phi-2.yaml +++ b/examples/configurations/phi-2.yaml @@ -12,7 +12,7 @@ parameters: top_p: 0.95 seed: -1 template: - chat: &template | + chat: &template |- Instruct: {{.Input}} Output: completion: *template From ba85d0bcad07b2a455febcc53c3537bb9b1485ea Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 21 Feb 2024 23:40:25 +0100 Subject: [PATCH 0079/2895] feat(upload-api): do not display error if uploadedFiles.json is not present Signed-off-by: Ettore Di Giacinto --- api/openai/files.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/api/openai/files.go b/api/openai/files.go index 57f5c48d..140b4151 100644 --- a/api/openai/files.go +++ b/api/openai/files.go @@ -17,6 +17,8 @@ import ( var uploadedFiles []File +const uploadedFilesFile = "uploadedFiles.json" + // File represents the structure of a file object from the OpenAI API. type File struct { ID string `json:"id"` // Unique identifier for the file @@ -33,14 +35,22 @@ func saveUploadConfig(uploadDir string) { log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err) } - err = os.WriteFile(filepath.Join(uploadDir, "uploadedFiles.json"), file, 0644) + err = os.WriteFile(filepath.Join(uploadDir, uploadedFilesFile), file, 0644) if err != nil { log.Error().Msgf("Failed to save uploadedFiles to file: %s", err) } } func LoadUploadConfig(uploadPath string) { - file, err := os.ReadFile(filepath.Join(uploadPath, "uploadedFiles.json")) + uploadFilePath := filepath.Join(uploadPath, uploadedFilesFile) + + _, err := os.Stat(uploadFilePath) + if os.IsNotExist(err) { + log.Debug().Msgf("No uploadedFiles file found at %s", uploadFilePath) + return + } + + file, err := os.ReadFile(uploadFilePath) if err != nil { log.Error().Msgf("Failed to read file: %s", err) } else { From feba38be36b0d4bc573c9acfb4d55fbfc27aec31 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 22 Feb 2024 00:15:01 +0100 Subject: [PATCH 0080/2895] examples(mistral-openorca): add stopword Signed-off-by: Ettore Di Giacinto --- embedded/models/mistral-openorca.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml index fbab4e39..f40d854f 100644 --- a/embedded/models/mistral-openorca.yaml +++ b/embedded/models/mistral-openorca.yaml @@ -11,20 +11,18 @@ template: <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} {{if .Content}}{{.Content}}{{end}} <|im_end|> - chat: | {{.Input}} <|im_start|>assistant - completion: | {{.Input}} context_size: 4096 f16: true stopwords: - <|im_end|> - +- usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "mistral-openorca", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' \ No newline at end of file + }' From 6fc122fa1a91248905f4ab8bf39fe1f56b9dec1b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 22 Feb 2024 10:33:23 +0100 Subject: [PATCH 0081/2895] :arrow_up: Update ggerganov/llama.cpp (#1705) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8da648dd..d34bac30 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=88c46cbdac05cebd936511b1d3c74112e721615f +CPPLLAMA_VERSION?=89febfed9322c8849520dc63c93ee4f5fd72556e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From cbed6ab1bb7b0b779bf7df49f62fb03928a507a8 Mon Sep 17 00:00:00 2001 From: Luna Midori <118759930+lunamidori5@users.noreply.github.com> Date: Thu, 22 Feb 2024 07:35:06 -0800 Subject: [PATCH 0082/2895] Update README.md (#1739) * Update README.md Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> * Update README.md Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> --------- Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 46068031..2ae95d8c 100644 --- a/README.md +++ b/README.md @@ -98,9 +98,8 @@ WebUIs: Model galleries - https://github.com/go-skynet/model-gallery -Auto Docker / Model setup -- https://io.midori-ai.xyz/howtos/easy-localai-installer/ -- https://io.midori-ai.xyz/howtos/easy-model-installer/ +UI / Management Programs +- [LocalAI Manager](https://io.midori-ai.xyz/howtos/easy-model-installer/) Other: - Helm chart https://github.com/go-skynet/helm-charts From d825821a22d3e624a20d17bd00541b9514e2614a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 23 Feb 2024 00:07:15 +0100 Subject: [PATCH 0083/2895] :arrow_up: Update ggerganov/llama.cpp (#1740) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d34bac30..d41da5f4 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=89febfed9322c8849520dc63c93ee4f5fd72556e +CPPLLAMA_VERSION?=201294ae177b308fb3a99dc504dd6d27e8afa907 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From ff88c390bb51d9567572815a63c575eb2e3dd062 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 24 Feb 2024 00:06:46 +0100 Subject: [PATCH 0084/2895] :arrow_up: Update ggerganov/llama.cpp (#1750) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d41da5f4..71ca6fcf 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=201294ae177b308fb3a99dc504dd6d27e8afa907 +CPPLLAMA_VERSION?=fd43d66f46ee3b5345fb8a74a252d86ccd34a409 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 0135e1e3b90d361b337f84eb76ee9d784ea20c40 Mon Sep 17 00:00:00 2001 From: Ludovic Leroux Date: Sat, 24 Feb 2024 05:48:45 -0500 Subject: [PATCH 0085/2895] fix: vllm - use AsyncLLMEngine to allow true streaming mode (#1749) * fix: use vllm AsyncLLMEngine to bring true stream Current vLLM implementation uses the LLMEngine, which was designed for offline batch inference, which results in the streaming mode outputing all blobs at once at the end of the inference. This PR reworks the gRPC server to use asyncio and gRPC.aio, in combination with vLLM's AsyncLLMEngine to bring true stream mode. This PR also passes more parameters to vLLM during inference (presence_penalty, frequency_penalty, stop, ignore_eos, seed, ...). * Remove unused import --- backend/python/vllm/backend_vllm.py | 138 +++++++++++++++++++--------- 1 file changed, 93 insertions(+), 45 deletions(-) diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py index d5b8b51f..8f8c4ee0 100644 --- a/backend/python/vllm/backend_vllm.py +++ b/backend/python/vllm/backend_vllm.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 +import asyncio from concurrent import futures -import time import argparse import signal import sys @@ -10,7 +10,10 @@ import backend_pb2 import backend_pb2_grpc import grpc -from vllm import LLM, SamplingParams +from vllm.engine.arg_utils import AsyncEngineArgs +from vllm.engine.async_llm_engine import AsyncLLMEngine +from vllm.sampling_params import SamplingParams +from vllm.utils import random_uuid _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -79,16 +82,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): Returns: backend_pb2.Result: The load model result. """ + engine_args = AsyncEngineArgs( + model=request.Model, + ) + + if request.Quantization != "": + engine_args.quantization = request.Quantization + try: - if request.Quantization != "": - self.llm = LLM(model=request.Model, quantization=request.Quantization) - else: - self.llm = LLM(model=request.Model) + self.llm = AsyncLLMEngine.from_engine_args(engine_args) except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(message="Model loaded successfully", success=True) - def Predict(self, request, context): + async def Predict(self, request, context): """ Generates text based on the given prompt and sampling parameters. @@ -99,24 +106,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): Returns: backend_pb2.Reply: The predict result. """ - if request.TopP == 0: - request.TopP = 0.9 + gen = self._predict(request, context, streaming=False) + res = await gen.__anext__() + return res - max_tokens = 200 - if request.Tokens > 0: - max_tokens = request.Tokens - - sampling_params = SamplingParams(max_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP) - outputs = self.llm.generate([request.Prompt], sampling_params) - - generated_text = outputs[0].outputs[0].text - # Remove prompt from response if present - if request.Prompt in generated_text: - generated_text = generated_text.replace(request.Prompt, "") - - return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) - - def PredictStream(self, request, context): + async def PredictStream(self, request, context): """ Generates text based on the given prompt and sampling parameters, and streams the results. @@ -127,30 +121,84 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): Returns: backend_pb2.Result: The predict stream result. """ - yield self.Predict(request, context) + iterations = self._predict(request, context, streaming=True) + try: + async for iteration in iterations: + yield iteration + finally: + await iterations.aclose() -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + async def _predict(self, request, context, streaming=False): + + # Build sampling parameters + sampling_params = SamplingParams(top_p=0.9, max_tokens=200) + if request.TopP != 0: + sampling_params.top_p = request.TopP + if request.Tokens > 0: + sampling_params.max_tokens = request.Tokens + if request.Temperature != 0: + sampling_params.temperature = request.Temperature + if request.TopK != 0: + sampling_params.top_k = request.TopK + if request.PresencePenalty != 0: + sampling_params.presence_penalty = request.PresencePenalty + if request.FrequencyPenalty != 0: + sampling_params.frequency_penalty = request.FrequencyPenalty + if request.StopPrompts: + sampling_params.stop = request.StopPrompts + if request.IgnoreEOS: + sampling_params.ignore_eos = request.IgnoreEOS + if request.Seed != 0: + sampling_params.seed = request.Seed + + # Generate text + request_id = random_uuid() + outputs = self.llm.generate(request.Prompt, sampling_params, request_id) + + # Stream the results + generated_text = "" + try: + async for request_output in outputs: + iteration_text = request_output.outputs[0].text + + if streaming: + # Remove text already sent as vllm concatenates the text from previous yields + delta_iteration_text = iteration_text.removeprefix(generated_text) + # Send the partial result + yield backend_pb2.Reply(message=bytes(delta_iteration_text, encoding='utf-8')) + + # Keep track of text generated + generated_text = iteration_text + finally: + await outputs.aclose() + + # If streaming, we already sent everything + if streaming: + return + + # Sending the final generated text + yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) + +async def serve(address): + # Start asyncio gRPC server + server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + # Add the servicer to the server backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + # Bind the server to the address server.add_insecure_port(address) - server.start() + + # Gracefully shutdown the server on SIGTERM or SIGINT + loop = asyncio.get_event_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler( + sig, lambda: asyncio.ensure_future(server.stop(5)) + ) + + # Start the server + await server.start() print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) + # Wait for the server to be terminated + await server.wait_for_termination() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run the gRPC server.") @@ -159,4 +207,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - serve(args.addr) + asyncio.run(serve(args.addr)) \ No newline at end of file From aa098e4d0ba3a05194b5f26ea294933ec0434c5e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 24 Feb 2024 11:51:59 +0100 Subject: [PATCH 0086/2895] fix(sse): do not omit empty finish_reason (#1745) Fixes https://github.com/mudler/LocalAI/issues/1744 --- core/schema/openai.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schema/openai.go b/core/schema/openai.go index 23abd7b7..53dd5324 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -49,7 +49,7 @@ type OpenAIResponse struct { type Choice struct { Index int `json:"index"` - FinishReason string `json:"finish_reason,omitempty"` + FinishReason string `json:"finish_reason"` Message *Message `json:"message,omitempty"` Delta *Message `json:"delta,omitempty"` Text string `json:"text,omitempty"` From 71771d1e9b745691647ddafe3c71225fbeed3bb6 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 25 Feb 2024 10:02:52 +0100 Subject: [PATCH 0087/2895] :arrow_up: Update docs version mudler/LocalAI (#1752) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 890f6c35..59fd693c 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.8.2" + "version": "v2.9.0" } From 8e45d47740a5d20d4460377a596638e24b58ad66 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 25 Feb 2024 10:03:19 +0100 Subject: [PATCH 0088/2895] :arrow_up: Update ggerganov/llama.cpp (#1753) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 71ca6fcf..d2c5e351 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=fd43d66f46ee3b5345fb8a74a252d86ccd34a409 +CPPLLAMA_VERSION?=9e359a4f47c1b2dceb99e29706c9f7403d32ab5e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 7f72a6110488839558b1dc7994aee6948d44e00b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Sun, 25 Feb 2024 15:06:18 -0800 Subject: [PATCH 0089/2895] ci: add stablediffusion to release (#1757) Signed-off-by: Sertac Ozercan --- .github/workflows/release.yaml | 29 +++++++++++++++++++++++++++++ Makefile | 5 +++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 82745225..aa0a270b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -89,6 +89,35 @@ jobs: files: | release/* + build-stablediffusion: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - uses: actions/setup-go@v4 + with: + go-version: '>=1.21.0' + - name: Dependencies + run: | + sudo apt-get install -y --no-install-recommends libopencv-dev + sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + - name: Build stablediffusion + run: | + make backend-assets/grpc/stablediffusion + mkdir -p release && cp backend-assets/grpc/stablediffusion release + - uses: actions/upload-artifact@v3 + with: + name: stablediffusion + path: release/ + - name: Release + uses: softprops/action-gh-release@v1 + if: startsWith(github.ref, 'refs/tags/') + with: + files: | + release/* + build-macOS: strategy: matrix: diff --git a/Makefile b/Makefile index d2c5e351..49dec19c 100644 --- a/Makefile +++ b/Makefile @@ -250,7 +250,7 @@ sources/go-piper/libpiper_binding.a: sources/go-piper $(MAKE) -C sources/go-piper libpiper_binding.a example/main backend/cpp/llama/llama.cpp: - LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp + LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream touch $@ @@ -482,7 +482,7 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server else echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined." - LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server + LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server endif ## BACKEND CPP LLAMA END @@ -516,6 +516,7 @@ backend-assets/grpc/langchain-huggingface: backend-assets/grpc backend-assets/grpc/stablediffusion: backend-assets/grpc if [ ! -f backend-assets/grpc/stablediffusion ]; then \ + $(MAKE) sources/go-stable-diffusion; \ $(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \ From 05818e0425f891f40ca7e9215916ade2b615d660 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 26 Feb 2024 08:38:23 +0100 Subject: [PATCH 0090/2895] fix(functions): handle correctly when there are no results (#1758) --- api/openai/chat.go | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/api/openai/chat.go b/api/openai/chat.go index 78d02f96..cd535f0a 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -565,10 +565,20 @@ func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults log.Debug().Msgf("Function return: %s %+v", s, ss) for _, s := range ss { - func_name := s["function"] - args := s["arguments"] + func_name, ok := s["function"] + if !ok { + continue + } + args, ok := s["arguments"] + if !ok { + continue + } d, _ := json.Marshal(args) - results = append(results, funcCallResults{name: func_name.(string), arguments: string(d)}) + funcName, ok := func_name.(string) + if !ok { + continue + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) } } else { // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) @@ -579,12 +589,21 @@ func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults log.Debug().Msgf("Function return: %s %+v", s, ss) // The grammar defines the function name as "function", while OpenAI returns "name" - func_name := ss["function"] + func_name, ok := ss["function"] + if !ok { + return results + } // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + return results + } d, _ := json.Marshal(args) - - results = append(results, funcCallResults{name: func_name.(string), arguments: string(d)}) + funcName, ok := func_name.(string) + if !ok { + return results + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) } return results From bc5f5aa538a48379cfe917ae6e1b928a9eb62c51 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 26 Feb 2024 13:18:44 +0100 Subject: [PATCH 0091/2895] deps(llama.cpp): update (#1759) Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 225 ++++++++++++++++++++++++------ 2 files changed, 182 insertions(+), 45 deletions(-) diff --git a/Makefile b/Makefile index 49dec19c..6db75171 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=9e359a4f47c1b2dceb99e29706c9f7403d32ab5e +CPPLLAMA_VERSION?=c39373398803c669056304090050fe3f44b41bf9 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 0066c16d..c91ce854 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -58,9 +58,11 @@ struct server_params int32_t read_timeout = 600; int32_t write_timeout = 600; bool slots_endpoint = true; + bool metrics_endpoint = false; }; bool server_verbose = false; +bool server_log_json = true; static size_t common_part(const std::vector &a, const std::vector &b) { @@ -316,12 +318,76 @@ struct llama_client_slot } void print_timings() const { - LOG_TEE("\n"); - LOG_TEE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", - __func__, t_prompt_processing, num_prompt_tokens_processed, t_prompt_processing / num_prompt_tokens_processed, 1e3 / t_prompt_processing * num_prompt_tokens_processed); - LOG_TEE("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", - __func__, t_token_generation, n_decoded,t_token_generation / n_decoded, 1e3 / t_token_generation * n_decoded); - LOG_TEE("%s: total time = %10.2f ms\n", __func__, t_prompt_processing + t_token_generation); + char buffer[512]; + double t_token = t_prompt_processing / num_prompt_tokens_processed; + double n_tokens_second = 1e3 / t_prompt_processing * num_prompt_tokens_processed; + sprintf(buffer, "prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)", + t_prompt_processing, num_prompt_tokens_processed, + t_token, n_tokens_second); + LOG_INFO(buffer, { + {"slot_id", id}, + {"task_id", task_id}, + {"t_prompt_processing", t_prompt_processing}, + {"num_prompt_tokens_processed", num_prompt_tokens_processed}, + {"t_token", t_token}, + {"n_tokens_second", n_tokens_second}, + }); + + t_token = t_token_generation / n_decoded; + n_tokens_second = 1e3 / t_token_generation * n_decoded; + sprintf(buffer, "generation eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)", + t_token_generation, n_decoded, + t_token, n_tokens_second); + LOG_INFO(buffer, { + {"slot_id", id}, + {"task_id", task_id}, + {"t_token_generation", t_token_generation}, + {"n_decoded", n_decoded}, + {"t_token", t_token}, + {"n_tokens_second", n_tokens_second}, + }); + + sprintf(buffer, " total time = %10.2f ms", t_prompt_processing + t_token_generation); + LOG_INFO(buffer, { + {"slot_id", id}, + {"task_id", task_id}, + {"t_prompt_processing", t_prompt_processing}, + {"t_token_generation", t_token_generation}, + {"t_total", t_prompt_processing + t_token_generation}, + }); + } +}; + +struct llama_metrics { + uint64_t n_prompt_tokens_processed_total = 0; + uint64_t n_tokens_predicted_total = 0; + + uint64_t n_prompt_tokens_processed = 0; + uint64_t t_prompt_processing = 0; + + uint64_t n_tokens_predicted = 0; + uint64_t t_tokens_generation = 0; + + + void on_prompt_eval(const llama_client_slot &slot) { + n_prompt_tokens_processed_total += slot.num_prompt_tokens_processed; + + n_prompt_tokens_processed += slot.num_prompt_tokens_processed; + t_prompt_processing += slot.t_prompt_processing; + } + + void on_prediction(const llama_client_slot &slot) { + n_tokens_predicted_total += slot.n_decoded; + + n_tokens_predicted += slot.n_decoded; + t_tokens_generation += slot.t_token_generation; + } + + void reset_bucket() { + n_prompt_tokens_processed = 0; + t_prompt_processing = 0; + n_tokens_predicted = 0; + t_tokens_generation = 0; } }; @@ -359,6 +425,8 @@ struct llama_server_context llama_server_queue queue_tasks; llama_server_response queue_results; + llama_metrics metrics; + ~llama_server_context() { if (ctx) @@ -378,7 +446,7 @@ struct llama_server_context params = params_; if (!params.mmproj.empty()) { multimodal = true; - LOG_TEE("Multi Modal Mode Enabled"); + LOG_INFO("Multi Modal Mode Enabled", {}); clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1); if(clp_ctx == nullptr) { LOG_ERROR("unable to load clip model", {{"model", params.mmproj}}); @@ -415,13 +483,23 @@ struct llama_server_context return true; } + void validate_model_chat_template(server_params & sparams) { + llama_chat_message chat[] = {{"user", "test"}}; + std::vector buf(1); + int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size()); + if (res < 0) { + LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {}); + sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template + } + } + void initialize() { // create slots all_slots_are_idle = true; const int32_t n_ctx_slot = n_ctx / params.n_parallel; - LOG_TEE("Available slots:\n"); + LOG_INFO("initializing slots", {{"n_slots", params.n_parallel}}); for (int i = 0; i < params.n_parallel; i++) { llama_client_slot slot; @@ -430,7 +508,10 @@ struct llama_server_context slot.n_ctx = n_ctx_slot; slot.n_predict = params.n_predict; - LOG_TEE(" -> Slot %i - max context: %i\n", slot.id, n_ctx_slot); + LOG_INFO("new slot", { + {"slot_id", slot.id}, + {"n_ctx_slot", slot.n_ctx} + }); const int ga_n = params.grp_attn_n; const int ga_w = params.grp_attn_w; @@ -440,7 +521,12 @@ struct llama_server_context GGML_ASSERT(ga_w % ga_n == 0 && "ga_w must be a multiple of ga_n"); // NOLINT //GGML_ASSERT(n_ctx_train % ga_w == 0 && "n_ctx_train must be a multiple of ga_w"); // NOLINT //GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * ga_n"); // NOLINT - LOG_TEE(" -> Slot %i - self-extend: ga_n = %d, ga_w = %d\n", slot.id, ga_n, ga_w); + + LOG_INFO("slot self-extend", { + {"slot_id", slot.id}, + {"ga_n", ga_n}, + {"ga_w", ga_w} + }); } slot.ga_i = 0; @@ -726,10 +812,16 @@ struct llama_server_context img_sl.img_data = clip_image_u8_init(); if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) { - LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id); + LOG_ERROR("failed to load image", { + {"slot_id", slot->id}, + {"img_sl_id", img_sl.id} + }); return false; } - LOG_TEE("slot %i - loaded image\n", slot->id); + LOG_VERBOSE("image loaded", { + {"slot_id", slot->id}, + {"img_sl_id", img_sl.id} + }); img_sl.request_encode_image = true; slot->images.push_back(img_sl); } @@ -789,7 +881,10 @@ struct llama_server_context all_slots_are_idle = false; - LOG_TEE("slot %i is processing [task id: %i]\n", slot->id, slot->task_id); + LOG_INFO("slot is processing task", { + {"slot_id", slot->id}, + {"task_id", slot->task_id}, + }); return true; } @@ -814,10 +909,24 @@ struct llama_server_context llama_batch_add(batch, system_tokens[i], i, { 0 }, false); } - if (llama_decode(ctx, batch) != 0) + for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) { - LOG_TEE("%s: llama_decode() failed\n", __func__); - return; + const int32_t n_tokens = std::min(params.n_batch, (int32_t) (batch.n_tokens - i)); + llama_batch batch_view = { + n_tokens, + batch.token + i, + nullptr, + batch.pos + i, + batch.n_seq_id + i, + batch.seq_id + i, + batch.logits + i, + 0, 0, 0, // unused + }; + if (llama_decode(ctx, batch_view) != 0) + { + LOG_TEE("%s: llama_decode() failed\n", __func__); + return; + } } // assign the system KV cache to all parallel sequences @@ -1351,7 +1460,7 @@ struct llama_server_context if (slot == nullptr) { // if no slot is available, we defer this task for processing later - LOG_VERBOSE("no slot is available", {}); + LOG_VERBOSE("no slot is available", {{"task_id", task.id}}); queue_tasks.defer(task); break; } @@ -1425,7 +1534,7 @@ struct llama_server_context bool update_slots() { if (system_need_update) { - LOG_TEE("updating system prompt\n"); + LOG_INFO("updating system prompt", {}); update_system_prompt(); } @@ -1435,12 +1544,13 @@ struct llama_server_context { if (system_prompt.empty() && clean_kv_cache) { - LOG_TEE("all slots are idle and system prompt is empty, clear the KV cache\n"); + LOG_INFO("all slots are idle and system prompt is empty, clear the KV cache", {}); kv_cache_clear(); } return true; } + LOG_VERBOSE("posting NEXT_RESPONSE", {}); task_server task; task.type = TASK_TYPE_NEXT_RESPONSE; task.target_id = -1; @@ -1471,6 +1581,7 @@ struct llama_server_context } // decode any currently ongoing sequences + LOG_VERBOSE("decoding ongoing sequences", {}); for (auto & slot : slots) { // release the slot @@ -1480,7 +1591,15 @@ struct llama_server_context slot.command = NONE; slot.t_last_used = ggml_time_us(); - LOG_TEE("slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size()); + LOG_INFO("slot released", { + {"slot_id", slot.id}, + {"task_id", slot.task_id}, + {"n_ctx", n_ctx}, + {"n_past", slot.n_past}, + {"n_system_tokens", system_tokens.size()}, + {"n_cache_tokens", slot.cache_tokens.size()}, + {"truncated", slot.truncated} + }); queue_tasks.notify_slot_changed(); continue; @@ -1607,6 +1726,14 @@ struct llama_server_context } slot.n_past = common_part(slot.cache_tokens, prompt_tokens); + + // the last token of the cache is not in the KV cache until the next call to llama_decode + // (it was sampled, pushed into the "cache_tokens", but not yet put in the context) + if (slot.n_past > 0 && slot.n_past == (int32_t) slot.cache_tokens.size()) + { + slot.n_past -= 1; + } + slot.num_prompt_tokens_processed = slot.num_prompt_tokens - slot.n_past; if (slot.ga_n != 1) @@ -1628,7 +1755,12 @@ struct llama_server_context slot.ga_i = ga_i; } - LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed); + LOG_INFO("slot progression", { + { "slot_id", slot.id }, + { "task_id", slot.task_id }, + { "n_past", slot.n_past }, + { "num_prompt_tokens_processed", slot.num_prompt_tokens_processed } + }); } slot.cache_tokens = prompt_tokens; @@ -1636,7 +1768,10 @@ struct llama_server_context if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0) { // we have to evaluate at least 1 token to generate logits. - LOG_TEE("slot %d : we have to evaluate at least 1 token to generate logits\n", slot.id); + LOG_INFO("we have to evaluate at least 1 token to generate logits", { + { "slot_id", slot.id }, + { "task_id", slot.task_id } + }); slot.n_past--; if (slot.ga_i > 0) { @@ -1644,9 +1779,13 @@ struct llama_server_context } } - LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past); - - llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1); + int p0 = (int) system_tokens.size() + slot.n_past; + LOG_INFO("kv cache rm [p0, end)", { + { "slot_id", slot.id }, + { "task_id", slot.task_id }, + { "p0", p0 } + }); + llama_kv_cache_seq_rm(ctx, slot.id, p0, -1); LOG_VERBOSE("prompt ingested", { {"n_past", slot.n_past}, @@ -1681,7 +1820,13 @@ struct llama_server_context if (has_images && !ingest_images(slot, n_batch)) { - LOG_TEE("failed processing images\n"); + LOG_ERROR("failed processing images", { + "slot_id", slot.id, + "task_id", slot.task_id, + }); + // FIXME @phymbert: to be properly tested + // early returning without changing the slot state will block the slot for ever + // no one at the moment is checking the return value return false; } @@ -1723,9 +1868,9 @@ struct llama_server_context LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n); LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd); - llama_kv_cache_seq_shift(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd); + llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd); llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n); - llama_kv_cache_seq_shift(ctx, slot.id, slot.ga_i + ib * bd + slot.ga_w,slot.n_past_se + ib * bd, dd); + llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i + ib * bd + slot.ga_w,slot.n_past_se + ib * bd, dd); slot.n_past_se -= bd; @@ -1781,7 +1926,7 @@ struct llama_server_context send_embedding(slot); slot.release(); slot.i_batch = -1; - return true; + continue; } completion_token_output result; @@ -1794,6 +1939,7 @@ struct llama_server_context { slot.t_start_genereration = ggml_time_us(); slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3; + metrics.on_prompt_eval(slot); } llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false }; @@ -1816,11 +1962,14 @@ struct llama_server_context slot.release(); slot.print_timings(); send_final_response(slot); + metrics.on_prediction(slot); } slot.i_batch = -1; } } + + LOG_VERBOSE("slots updated", {}); return true; } @@ -1849,18 +1998,6 @@ static json format_partial_response( return res; } -static json format_tokenizer_response(const std::vector &tokens) -{ - return json{ - {"tokens", tokens}}; -} - -static json format_detokenized_response(std::string content) -{ - return json{ - {"content", content}}; -} - struct token_translator { llama_context * ctx; @@ -2119,9 +2256,9 @@ static void params_parse(const backend::ModelOptions* request, params.use_mmap = request->mmap(); params.embedding = request->embeddings(); - if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_NONE; } - else if (request->ropescaling() == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_YARN; } - else { params.rope_scaling_type = LLAMA_ROPE_SCALING_LINEAR; } + if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; } + else if (request->ropescaling() == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; } + else { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; } if ( request->yarnextfactor() != 0.0f ) { params.yarn_ext_factor = request->yarnextfactor(); } From dfe54639b18a8620ce775dc7b058ffc5355ea284 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 27 Feb 2024 10:37:56 +0100 Subject: [PATCH 0092/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2ae95d8c..9207d5c8 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - Assistant API: https://github.com/mudler/LocalAI/issues/1273 +- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 From d6cf82aba3da6dfd36a8a3858db24764d2eb3edf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 27 Feb 2024 15:04:19 +0100 Subject: [PATCH 0093/2895] fix(tests): re-enable tests after code move (#1764) Signed-off-by: Ettore Di Giacinto --- Makefile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 6db75171..3d821f07 100644 --- a/Makefile +++ b/Makefile @@ -155,6 +155,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) +TEST_PATHS?=./api/... ./pkg/... ./core/... # If empty, then we build all ifeq ($(GRPC_BACKENDS),) @@ -328,7 +329,7 @@ test-models/testmodel: cp tests/models_fixtures/* test-models prepare-test: grpcs - cp -rf backend-assets api + cp -rf backend-assets core/http cp tests/models_fixtures/* test-models test: prepare test-models/testmodel grpcs @@ -336,7 +337,7 @@ test: prepare test-models/testmodel grpcs export GO_TAGS="tts stablediffusion" $(MAKE) prepare-test HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r $(TEST_PATHS) $(MAKE) test-gpt4all $(MAKE) test-llama $(MAKE) test-llama-gguf @@ -365,23 +366,23 @@ teardown-e2e: test-gpt4all: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS) test-llama: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS) test-llama-gguf: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r ./api ./pkg + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS) test-tts: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS) test-stablediffusion: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r ./api ./pkg + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS) test-container: docker build --target requirements -t local-ai-test-container . From 3f09010227e16c12153a1c01dcf71963c63ca06a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 27 Feb 2024 15:43:15 +0100 Subject: [PATCH 0094/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9207d5c8..0d968efc 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Other: ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social) +- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance) - [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/) - [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE) - [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/) From 3868ac840258220f970a08dac0e513377c1bd0ed Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 27 Feb 2024 15:44:15 +0100 Subject: [PATCH 0095/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0d968efc..8fc9d220 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Other: ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social) +- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/ai/answers/tiZMDoZzZV6TLxgDXNBnFE/deploying-helm-charts-on-aws-eks) - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance) - [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/) - [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE) From 9112cf153e7028f02800a8729c1c93f883e759c5 Mon Sep 17 00:00:00 2001 From: Joshua Waring Date: Tue, 27 Feb 2024 17:35:59 +0100 Subject: [PATCH 0096/2895] Update integrations.md (#1765) Added Jetbrains compatible plugin for LocalAI Signed-off-by: Joshua Waring --- docs/content/docs/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index 28e71bd5..36cfec2d 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -14,6 +14,7 @@ The list below is a list of software that integrates with LocalAI. - [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm) - [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI. +- [https://plugins.jetbrains.com/plugin/21056-codegpt] allows for custom OpenAI compatible endpoints since 2.4.0 - https://github.com/longy2k/obsidian-bmo-chatbot - https://github.com/FlowiseAI/Flowise - https://github.com/k8sgpt-ai/k8sgpt From d21a6b33ab547544d6658dde52df3b18169005a7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 27 Feb 2024 19:07:51 +0100 Subject: [PATCH 0097/2895] :arrow_up: Update ggerganov/llama.cpp (#1756) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3d821f07..075324d7 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=c39373398803c669056304090050fe3f44b41bf9 +CPPLLAMA_VERSION?=a33e6a0d2a66104ea9a906bdbf8a94d050189d91 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From c7e08813a5691feda812f7dcfccf6165ff1c52b0 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 27 Feb 2024 23:12:51 +0100 Subject: [PATCH 0098/2895] :arrow_up: Update ggerganov/llama.cpp (#1767) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 075324d7..c8df0948 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=a33e6a0d2a66104ea9a906bdbf8a94d050189d91 +CPPLLAMA_VERSION?=cb49e0f8c906e5da49e9f6d64a57742a9a241c6a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From c8be839481222cf2ebedb316b804c705d6b012e9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 27 Feb 2024 23:24:46 +0100 Subject: [PATCH 0099/2895] Update openai-functions.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/openai-functions.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/docs/features/openai-functions.md index 3f47898f..bd7c5a7d 100644 --- a/docs/content/docs/features/openai-functions.md +++ b/docs/content/docs/features/openai-functions.md @@ -1,16 +1,18 @@ +++ disableToc = false -title = "🔥 OpenAI functions" +title = "🔥 OpenAI functions and tools" weight = 17 url = "/features/openai-functions/" +++ -LocalAI supports running OpenAI functions with `llama.cpp` compatible models. +LocalAI supports running OpenAI [functions and tools API](https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools) with `llama.cpp` compatible models. ![localai-functions-1](https://github.com/ggerganov/llama.cpp/assets/2420543/5bd15da2-78c1-4625-be90-1e938e6823f1) -To learn more about OpenAI functions, see the [OpenAI API blog post](https://openai.com/blog/function-calling-and-other-api-updates). +To learn more about OpenAI functions, see also the [OpenAI API blog post](https://openai.com/blog/function-calling-and-other-api-updates). + +LocalAI is also supporting [JSON mode](https://platform.openai.com/docs/guides/text-generation/json-mode) out of the box with llama.cpp-compatible models. 💡 Check out also [LocalAGI](https://github.com/mudler/LocalAGI) for an example on how to use LocalAI functions. From 6e95beccb9899d9636321855dec0c599cafe6d23 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 28 Feb 2024 15:24:08 +0100 Subject: [PATCH 0100/2895] Update overview.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/overview.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index c1232969..3ac21e94 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -111,6 +111,3 @@ This is a community project, a special thanks to our contributors! 🤗 - - - From be498c5dd9b673f1480fffd593741d04c3efdae4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 28 Feb 2024 15:58:31 +0100 Subject: [PATCH 0101/2895] Update openai-functions.md Signed-off-by: Ettore Di Giacinto --- .../content/docs/features/openai-functions.md | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/docs/features/openai-functions.md index bd7c5a7d..843524f4 100644 --- a/docs/content/docs/features/openai-functions.md +++ b/docs/content/docs/features/openai-functions.md @@ -80,6 +80,26 @@ When running the python script, be sure to: ## Advanced +### Parallel tools calls + +This feature is experimental and has to be configured in the YAML of the model by enabling `function.parallel_calls`: + +```yaml +name: gpt-3.5-turbo +parameters: + # Model file name + model: ggml-openllama.bin + top_p: 80 + top_k: 0.9 + temperature: 0.1 + +function: + # set to true to allow the model to call multiple functions in parallel + parallel_calls: true +``` + +### Use functions with grammar + It is possible to also specify the full function signature (for debugging, or to use with other clients). The chat endpoint accepts the `grammar_json_functions` additional parameter which takes a JSON schema object. From ba672b51dabd91259dc56301ce8b1d7c93964f36 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 28 Feb 2024 16:03:38 +0100 Subject: [PATCH 0102/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8fc9d220..43c534ac 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`: ``` -docker run -ti -p 8080:8080 localai/localai:v2.7.0-ffmpeg-core phi-2 +docker run -ti -p 8080:8080 localai/localai:v2.9.0-ffmpeg-core phi-2 ``` ## 🚀 [Features](https://localai.io/features/) From f651a660aa5508555a431ae722bed88ebb733f86 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 28 Feb 2024 23:02:30 +0100 Subject: [PATCH 0103/2895] :arrow_up: Update ggerganov/llama.cpp (#1772) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c8df0948..7cf49644 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=cb49e0f8c906e5da49e9f6d64a57742a9a241c6a +CPPLLAMA_VERSION?=87c91c07663b707e831c59ec373b5e665ff9d64a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From c6658986525d1e56ef3c8bb349295428057e1946 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 29 Feb 2024 00:50:27 +0100 Subject: [PATCH 0104/2895] :arrow_up: Update donomii/go-rwkv.cpp (#1771) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7cf49644..37d1d0c0 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8 # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp -RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f +RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346 From c1966af2cf1d9664aa858cf2c858a2f6d308a51e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 29 Feb 2024 11:40:08 +0100 Subject: [PATCH 0105/2895] ci: reduce stress on self-hosted runners (#1776) Split jobs by self-hosted and free public runner provided by Github Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 95 +++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 830528a1..a9620baa 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -13,7 +13,7 @@ concurrency: cancel-in-progress: true jobs: - extras-image-build: + self-hosted-jobs: uses: ./.github/workflows/image_build.yml with: tag-latest: ${{ matrix.tag-latest }} @@ -37,6 +37,7 @@ jobs: max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }} matrix: include: + # Extra images - build-type: '' #platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64' @@ -119,51 +120,7 @@ jobs: image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' - core-image-build: - uses: ./.github/workflows/image_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} - image-type: ${{ matrix.image-type }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - base-image: ${{ matrix.base-image }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - matrix: - include: - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-hipblas-ffmpeg-core' - ffmpeg: 'true' - image-type: 'core' - base-image: "rocm/dev-ubuntu-22.04:6.0-complete" - runs-on: 'arc-runner-set' - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-hipblas-core' - ffmpeg: 'false' - image-type: 'core' - base-image: "rocm/dev-ubuntu-22.04:6.0-complete" - runs-on: 'arc-runner-set' - - build-type: '' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-ffmpeg-core' - ffmpeg: 'true' - image-type: 'core' - base-image: "ubuntu:22.04" - runs-on: 'ubuntu-latest' + # Core images - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' @@ -196,6 +153,52 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'arc-runner-set' + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + runs-on: 'arc-runner-set' + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas-core' + ffmpeg: 'false' + image-type: 'core' + base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + runs-on: 'arc-runner-set' + + core-image-build: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + matrix: + include: + - build-type: '' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "ubuntu:22.04" + runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" From 31a4c9c9d3abc58de2bdc5305419181c8b33eb1c Mon Sep 17 00:00:00 2001 From: Oussama <57842127+ouxs-19@users.noreply.github.com> Date: Thu, 29 Feb 2024 19:32:29 +0100 Subject: [PATCH 0106/2895] Fix Command Injection Vulnerability (#1778) * Added fix for command injection * changed function name from sh to runCommand --- backend/go/transcribe/transcript.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index dc331cae..fdfaa974 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -11,21 +11,21 @@ import ( "github.com/go-skynet/LocalAI/core/schema" ) -func sh(c string) (string, error) { - cmd := exec.Command("/bin/sh", "-c", c) +func runCommand(command []string) (string, error) { + cmd := exec.Command(command[0], command[1:]...) cmd.Env = os.Environ() - o, err := cmd.CombinedOutput() - return string(o), err + out, err := cmd.CombinedOutput() + return string(out), err } -// AudioToWav converts audio to wav for transcribe. It bashes out to ffmpeg +// AudioToWav converts audio to wav for transcribe. // TODO: use https://github.com/mccoyst/ogg? func audioToWav(src, dst string) error { - out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst)) + command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} + out, err := runCommand(command) if err != nil { return fmt.Errorf("error: %w out: %s", err, out) } - return nil } From 9068bc5271e583cc889feb2a29cd2dc9fa532a73 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 29 Feb 2024 19:53:04 +0100 Subject: [PATCH 0107/2895] Create SECURITY.md Signed-off-by: Ettore Di Giacinto --- SECURITY.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..9c39f823 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,42 @@ +# Security Policy + +## Introduction + +At LocalAI, we take the security of our software seriously. We understand the importance of protecting our community from vulnerabilities and are committed to ensuring the safety and security of our users. + +## Supported Versions + +We provide support and updates for certain versions of our software. The following table outlines which versions are currently supported with security updates: + +| Version | Supported | +| ------- | ------------------ | +| > 2.0 | :white_check_mark: | +| < 2.0 | :x: | + +Please ensure that you are using a supported version to receive the latest security updates. + +## Reporting a Vulnerability + +We encourage the responsible disclosure of any security vulnerabilities. If you believe you've found a security issue in our software, we kindly ask you to follow the steps below to report it to us: + +1. **Email Us:** Send an email to [security@localai.io](mailto:security@localai.io) with a detailed report. Please do not disclose the vulnerability publicly or to any third parties before it has been addressed by us. + +2. **Expect a Response:** We aim to acknowledge receipt of vulnerability reports within 48 hours. Our security team will review your report and work closely with you to understand the impact and ensure a thorough investigation. + +3. **Collaboration:** If the vulnerability is accepted, we will work with you and our community to address the issue promptly. We'll keep you informed throughout the resolution process and may request additional information or collaboration. + +4. **Disclosure:** Once the vulnerability has been resolved, we encourage a coordinated disclosure. We believe in transparency and will work with you to ensure that our community is informed in a responsible manner. + +## Use of Third-Party Platforms + +As a Free and Open Source Software (FOSS) organization, we do not offer monetary bounties. However, researchers who wish to report vulnerabilities can also do so via [Huntr](https://huntr.dev/bounties), a platform that recognizes contributions to open source security. + +## Contact + +For any security-related inquiries beyond vulnerability reporting, please contact us at [security@localai.io](mailto:security@localai.io). + +## Acknowledgments + +We appreciate the efforts of those who contribute to the security of our project. Your responsible disclosure is invaluable to the safety and integrity of LocalAI. + +Thank you for helping us keep LocalAI secure. From 316de82f5189ec8ad1ba8b36f7bef99ab00c0700 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 29 Feb 2024 23:33:30 +0100 Subject: [PATCH 0108/2895] :arrow_up: Update ggerganov/llama.cpp (#1779) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 37d1d0c0..e9d3b2bc 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=87c91c07663b707e831c59ec373b5e665ff9d64a +CPPLLAMA_VERSION?=d5ab29757ebc59a30f03e408294ec20628a6374e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 1c312685aa52333d6250f70cf03a2d4ee72c4509 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 1 Mar 2024 10:19:53 -0500 Subject: [PATCH 0109/2895] refactor: move remaining api packages to core (#1731) * core 1 * api/openai/files fix * core 2 - core/config * move over core api.go and tests to the start of core/http * move over localai specific endpoints to core/http, begin the service/endpoint split there * refactor big chunk on the plane * refactor chunk 2 on plane, next step: port and modify changes to request.go * easy fixes for request.go, major changes not done yet * lintfix * json tag lintfix? * gitignore and .keep files * strange fix attempt: rename the config dir? --- .gitignore | 4 +- Makefile | 4 +- api/localai/backend_monitor.go | 162 --------- api/localai/gallery.go | 326 ------------------ configuration/.keep | 0 core/backend/embeddings.go | 30 +- core/backend/image.go | 48 +-- core/backend/llm.go | 8 +- core/backend/options.go | 16 +- core/backend/transcript.go | 19 +- core/backend/tts.go | 26 +- .../application_config.go} | 99 +++--- core/config/{config.go => backend_config.go} | 153 ++++---- core/config/config_test.go | 26 +- core/http/api.go | 229 ++++-------- core/http/api_test.go | 146 +++++--- {api => core/http}/ctx/fiber.go | 0 .../http/endpoints/localai/backend_monitor.go | 36 ++ core/http/endpoints/localai/gallery.go | 146 ++++++++ core/http/endpoints/localai/metrics.go | 43 +++ .../http/endpoints/localai/tts.go | 25 +- {api => core/http/endpoints}/openai/chat.go | 37 +- .../http/endpoints}/openai/completion.go | 24 +- {api => core/http/endpoints}/openai/edit.go | 16 +- .../http/endpoints}/openai/embeddings.go | 15 +- {api => core/http/endpoints}/openai/files.go | 28 +- .../http/endpoints}/openai/files_test.go | 24 +- {api => core/http/endpoints}/openai/image.go | 22 +- .../http/endpoints}/openai/inference.go | 8 +- {api => core/http/endpoints}/openai/list.go | 8 +- .../http/endpoints}/openai/request.go | 27 +- .../http/endpoints}/openai/transcription.go | 12 +- core/schema/localai.go | 21 ++ core/schema/openai.go | 8 +- core/{config => schema}/prediction.go | 2 +- core/services/backend_monitor.go | 140 ++++++++ core/services/gallery.go | 167 +++++++++ core/services/metrics.go | 54 +++ core/startup/config_file_watcher.go | 100 ++++++ core/startup/startup.go | 128 +++++++ .../backend monitor/backend monitor.bru | 8 +- .../langchainjs-localai-example/src/index.mts | 4 +- go.mod | 6 +- go.sum | 12 +- main.go | 121 ++++--- metrics/metrics.go | 83 ----- pkg/downloader/uri.go | 4 + pkg/gallery/models_test.go | 1 - pkg/gallery/op.go | 18 + tests/integration/reflect_test.go | 2 +- 50 files changed, 1440 insertions(+), 1206 deletions(-) delete mode 100644 api/localai/backend_monitor.go delete mode 100644 api/localai/gallery.go create mode 100644 configuration/.keep rename core/{options/options.go => config/application_config.go} (69%) rename core/config/{config.go => backend_config.go} (77%) rename {api => core/http}/ctx/fiber.go (100%) create mode 100644 core/http/endpoints/localai/backend_monitor.go create mode 100644 core/http/endpoints/localai/gallery.go create mode 100644 core/http/endpoints/localai/metrics.go rename api/localai/localai.go => core/http/endpoints/localai/tts.go (56%) rename {api => core/http/endpoints}/openai/chat.go (90%) rename {api => core/http/endpoints}/openai/completion.go (82%) rename {api => core/http/endpoints}/openai/edit.go (77%) rename {api => core/http/endpoints}/openai/embeddings.go (73%) rename {api => core/http/endpoints}/openai/files.go (83%) rename {api => core/http/endpoints}/openai/files_test.go (92%) rename {api => core/http/endpoints}/openai/image.go (87%) rename {api => core/http/endpoints}/openai/inference.go (90%) rename {api => core/http/endpoints}/openai/list.go (87%) rename {api => core/http/endpoints}/openai/request.go (89%) rename {api => core/http/endpoints}/openai/transcription.go (71%) create mode 100644 core/schema/localai.go rename core/{config => schema}/prediction.go (99%) create mode 100644 core/services/backend_monitor.go create mode 100644 core/services/gallery.go create mode 100644 core/services/metrics.go create mode 100644 core/startup/config_file_watcher.go create mode 100644 core/startup/startup.go delete mode 100644 metrics/metrics.go create mode 100644 pkg/gallery/op.go diff --git a/.gitignore b/.gitignore index df00829c..b48f7391 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ local-ai !charts/* # prevent above rules from omitting the api/localai folder !api/localai +!core/**/localai # Ignore models models/* @@ -34,6 +35,7 @@ release/ .idea # Generated during build -backend-assets/ +backend-assets/* +!backend-assets/.keep prepare /ggml-metal.metal diff --git a/Makefile b/Makefile index e9d3b2bc..a52774cd 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,8 @@ BUILD_ID?=git TEST_DIR=/tmp/test +TEST_FLAKES?=5 + RANDOM := $(shell bash -c 'echo $$RANDOM') VERSION?=$(shell git describe --always --tags || echo "dev" ) @@ -337,7 +339,7 @@ test: prepare test-models/testmodel grpcs export GO_TAGS="tts stablediffusion" $(MAKE) prepare-test HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(MAKE) test-gpt4all $(MAKE) test-llama $(MAKE) test-llama-gguf diff --git a/api/localai/backend_monitor.go b/api/localai/backend_monitor.go deleted file mode 100644 index e6f1b409..00000000 --- a/api/localai/backend_monitor.go +++ /dev/null @@ -1,162 +0,0 @@ -package localai - -import ( - "context" - "fmt" - "strings" - - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - "github.com/go-skynet/LocalAI/core/options" - "github.com/gofiber/fiber/v2" - "github.com/rs/zerolog/log" - - gopsutil "github.com/shirou/gopsutil/v3/process" -) - -type BackendMonitorRequest struct { - Model string `json:"model" yaml:"model"` -} - -type BackendMonitorResponse struct { - MemoryInfo *gopsutil.MemoryInfoStat - MemoryPercent float32 - CPUPercent float64 -} - -type BackendMonitor struct { - configLoader *config.ConfigLoader - options *options.Option // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. -} - -func NewBackendMonitor(configLoader *config.ConfigLoader, options *options.Option) BackendMonitor { - return BackendMonitor{ - configLoader: configLoader, - options: options, - } -} - -func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*BackendMonitorResponse, error) { - config, exists := bm.configLoader.GetConfig(model) - var backend string - if exists { - backend = config.Model - } else { - // Last ditch effort: use it raw, see if a backend happens to match. - backend = model - } - - if !strings.HasSuffix(backend, ".bin") { - backend = fmt.Sprintf("%s.bin", backend) - } - - pid, err := bm.options.Loader.GetGRPCPID(backend) - - if err != nil { - log.Error().Msgf("model %s : failed to find pid %+v", model, err) - return nil, err - } - - // Name is slightly frightening but this does _not_ create a new process, rather it looks up an existing process by PID. - backendProcess, err := gopsutil.NewProcess(int32(pid)) - - if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err) - return nil, err - } - - memInfo, err := backendProcess.MemoryInfo() - - if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err) - return nil, err - } - - memPercent, err := backendProcess.MemoryPercent() - if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err) - return nil, err - } - - cpuPercent, err := backendProcess.CPUPercent() - if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err) - return nil, err - } - - return &BackendMonitorResponse{ - MemoryInfo: memInfo, - MemoryPercent: memPercent, - CPUPercent: cpuPercent, - }, nil -} - -func (bm BackendMonitor) getModelLoaderIDFromCtx(c *fiber.Ctx) (string, error) { - input := new(BackendMonitorRequest) - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return "", err - } - - config, exists := bm.configLoader.GetConfig(input.Model) - var backendId string - if exists { - backendId = config.Model - } else { - // Last ditch effort: use it raw, see if a backend happens to match. - backendId = input.Model - } - - if !strings.HasSuffix(backendId, ".bin") { - backendId = fmt.Sprintf("%s.bin", backendId) - } - - return backendId, nil -} - -func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - - backendId, err := bm.getModelLoaderIDFromCtx(c) - if err != nil { - return err - } - - model := bm.options.Loader.CheckIsLoaded(backendId) - if model == "" { - return fmt.Errorf("backend %s is not currently loaded", backendId) - } - - status, rpcErr := model.GRPC(false, nil).Status(context.TODO()) - if rpcErr != nil { - log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) - val, slbErr := bm.SampleLocalBackendProcess(backendId) - if slbErr != nil { - return fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) - } - return c.JSON(proto.StatusResponse{ - State: proto.StatusResponse_ERROR, - Memory: &proto.MemoryUsageData{ - Total: val.MemoryInfo.VMS, - Breakdown: map[string]uint64{ - "gopsutil-RSS": val.MemoryInfo.RSS, - }, - }, - }) - } - - return c.JSON(status) - } -} - -func BackendShutdownEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - backendId, err := bm.getModelLoaderIDFromCtx(c) - if err != nil { - return err - } - - return bm.options.Loader.ShutdownModel(backendId) - } -} diff --git a/api/localai/gallery.go b/api/localai/gallery.go deleted file mode 100644 index ee6f4d7d..00000000 --- a/api/localai/gallery.go +++ /dev/null @@ -1,326 +0,0 @@ -package localai - -import ( - "context" - "fmt" - "os" - "slices" - "strings" - "sync" - - json "github.com/json-iterator/go" - "gopkg.in/yaml.v3" - - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/pkg/gallery" - "github.com/go-skynet/LocalAI/pkg/utils" - - "github.com/gofiber/fiber/v2" - "github.com/google/uuid" - "github.com/rs/zerolog/log" -) - -type galleryOp struct { - req gallery.GalleryModel - id string - galleries []gallery.Gallery - galleryName string -} - -type galleryOpStatus struct { - FileName string `json:"file_name"` - Error error `json:"error"` - Processed bool `json:"processed"` - Message string `json:"message"` - Progress float64 `json:"progress"` - TotalFileSize string `json:"file_size"` - DownloadedFileSize string `json:"downloaded_size"` -} - -type galleryApplier struct { - modelPath string - sync.Mutex - C chan galleryOp - statuses map[string]*galleryOpStatus -} - -func NewGalleryService(modelPath string) *galleryApplier { - return &galleryApplier{ - modelPath: modelPath, - C: make(chan galleryOp), - statuses: make(map[string]*galleryOpStatus), - } -} - -func prepareModel(modelPath string, req gallery.GalleryModel, cm *config.ConfigLoader, downloadStatus func(string, string, string, float64)) error { - - config, err := gallery.GetGalleryConfigFromURL(req.URL) - if err != nil { - return err - } - - config.Files = append(config.Files, req.AdditionalFiles...) - - return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) -} - -func (g *galleryApplier) updateStatus(s string, op *galleryOpStatus) { - g.Lock() - defer g.Unlock() - g.statuses[s] = op -} - -func (g *galleryApplier) getStatus(s string) *galleryOpStatus { - g.Lock() - defer g.Unlock() - - return g.statuses[s] -} - -func (g *galleryApplier) getAllStatus() map[string]*galleryOpStatus { - g.Lock() - defer g.Unlock() - - return g.statuses -} - -func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) { - go func() { - for { - select { - case <-c.Done(): - return - case op := <-g.C: - utils.ResetDownloadTimers() - - g.updateStatus(op.id, &galleryOpStatus{Message: "processing", Progress: 0}) - - // updates the status with an error - updateError := func(e error) { - g.updateStatus(op.id, &galleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()}) - } - - // displayDownload displays the download progress - progressCallback := func(fileName string, current string, total string, percentage float64) { - g.updateStatus(op.id, &galleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current}) - utils.DisplayDownloadFunction(fileName, current, total, percentage) - } - - var err error - // if the request contains a gallery name, we apply the gallery from the gallery list - if op.galleryName != "" { - if strings.Contains(op.galleryName, "@") { - err = gallery.InstallModelFromGallery(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback) - } else { - err = gallery.InstallModelFromGalleryByName(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback) - } - } else { - err = prepareModel(g.modelPath, op.req, cm, progressCallback) - } - - if err != nil { - updateError(err) - continue - } - - // Reload models - err = cm.LoadConfigs(g.modelPath) - if err != nil { - updateError(err) - continue - } - - err = cm.Preload(g.modelPath) - if err != nil { - updateError(err) - continue - } - - g.updateStatus(op.id, &galleryOpStatus{Processed: true, Message: "completed", Progress: 100}) - } - } - }() -} - -type galleryModel struct { - gallery.GalleryModel `yaml:",inline"` // https://github.com/go-yaml/yaml/issues/63 - ID string `json:"id"` -} - -func processRequests(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error { - var err error - for _, r := range requests { - utils.ResetDownloadTimers() - if r.ID == "" { - err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) - } else { - if strings.Contains(r.ID, "@") { - err = gallery.InstallModelFromGallery( - galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction) - } else { - err = gallery.InstallModelFromGalleryByName( - galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction) - } - } - } - return err -} - -func ApplyGalleryFromFile(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error { - dat, err := os.ReadFile(s) - if err != nil { - return err - } - var requests []galleryModel - - if err := yaml.Unmarshal(dat, &requests); err != nil { - return err - } - - return processRequests(modelPath, s, cm, galleries, requests) -} - -func ApplyGalleryFromString(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error { - var requests []galleryModel - err := json.Unmarshal([]byte(s), &requests) - if err != nil { - return err - } - - return processRequests(modelPath, s, cm, galleries, requests) -} - -/// Endpoint Service - -type ModelGalleryService struct { - galleries []gallery.Gallery - modelPath string - galleryApplier *galleryApplier -} - -type GalleryModel struct { - ID string `json:"id"` - gallery.GalleryModel -} - -func CreateModelGalleryService(galleries []gallery.Gallery, modelPath string, galleryApplier *galleryApplier) ModelGalleryService { - return ModelGalleryService{ - galleries: galleries, - modelPath: modelPath, - galleryApplier: galleryApplier, - } -} - -func (mgs *ModelGalleryService) GetOpStatusEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - status := mgs.galleryApplier.getStatus(c.Params("uuid")) - if status == nil { - return fmt.Errorf("could not find any status for ID") - } - return c.JSON(status) - } -} - -func (mgs *ModelGalleryService) GetAllStatusEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - return c.JSON(mgs.galleryApplier.getAllStatus()) - } -} - -func (mgs *ModelGalleryService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - input := new(GalleryModel) - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return err - } - - uuid, err := uuid.NewUUID() - if err != nil { - return err - } - mgs.galleryApplier.C <- galleryOp{ - req: input.GalleryModel, - id: uuid.String(), - galleryName: input.ID, - galleries: mgs.galleries, - } - return c.JSON(struct { - ID string `json:"uuid"` - StatusURL string `json:"status"` - }{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) - } -} - -func (mgs *ModelGalleryService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries) - - models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath) - if err != nil { - return err - } - log.Debug().Msgf("Models found from galleries: %+v", models) - for _, m := range models { - log.Debug().Msgf("Model found from galleries: %+v", m) - } - dat, err := json.Marshal(models) - if err != nil { - return err - } - return c.Send(dat) - } -} - -// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents! -func (mgs *ModelGalleryService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - log.Debug().Msgf("Listing model galleries %+v", mgs.galleries) - dat, err := json.Marshal(mgs.galleries) - if err != nil { - return err - } - return c.Send(dat) - } -} - -func (mgs *ModelGalleryService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - input := new(gallery.Gallery) - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return err - } - if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool { - return gallery.Name == input.Name - }) { - return fmt.Errorf("%s already exists", input.Name) - } - dat, err := json.Marshal(mgs.galleries) - if err != nil { - return err - } - log.Debug().Msgf("Adding %+v to gallery list", *input) - mgs.galleries = append(mgs.galleries, *input) - return c.Send(dat) - } -} - -func (mgs *ModelGalleryService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - input := new(gallery.Gallery) - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return err - } - if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool { - return gallery.Name == input.Name - }) { - return fmt.Errorf("%s is not currently registered", input.Name) - } - mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool { - return gallery.Name == input.Name - }) - return c.Send(nil) - } -} diff --git a/configuration/.keep b/configuration/.keep new file mode 100644 index 00000000..e69de29b diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index d8b89e12..0a74ea4c 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -3,36 +3,36 @@ package backend import ( "fmt" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/grpc" model "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.Config, o *options.Option) (func() ([]float32, error), error) { - if !c.Embeddings { +func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { + if !backendConfig.Embeddings { return nil, fmt.Errorf("endpoint disabled for this model by API configuration") } - modelFile := c.Model + modelFile := backendConfig.Model - grpcOpts := gRPCModelOpts(c) + grpcOpts := gRPCModelOpts(backendConfig) var inferenceModel interface{} var err error - opts := modelOpts(c, o, []model.Option{ + opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), - model.WithThreads(uint32(c.Threads)), - model.WithAssetDir(o.AssetsDestination), + model.WithThreads(uint32(backendConfig.Threads)), + model.WithAssetDir(appConfig.AssetsDestination), model.WithModel(modelFile), - model.WithContext(o.Context), + model.WithContext(appConfig.Context), }) - if c.Backend == "" { + if backendConfig.Backend == "" { inferenceModel, err = loader.GreedyLoader(opts...) } else { - opts = append(opts, model.WithBackendString(c.Backend)) + opts = append(opts, model.WithBackendString(backendConfig.Backend)) inferenceModel, err = loader.BackendLoader(opts...) } if err != nil { @@ -43,7 +43,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config. switch model := inferenceModel.(type) { case grpc.Backend: fn = func() ([]float32, error) { - predictOptions := gRPCPredictOpts(c, loader.ModelPath) + predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath) if len(tokens) > 0 { embeds := []int32{} @@ -52,7 +52,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config. } predictOptions.EmbeddingTokens = embeds - res, err := model.Embeddings(o.Context, predictOptions) + res, err := model.Embeddings(appConfig.Context, predictOptions) if err != nil { return nil, err } @@ -61,7 +61,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config. } predictOptions.Embeddings = s - res, err := model.Embeddings(o.Context, predictOptions) + res, err := model.Embeddings(appConfig.Context, predictOptions) if err != nil { return nil, err } diff --git a/core/backend/image.go b/core/backend/image.go index 12ea57ce..60db48f9 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -1,33 +1,33 @@ package backend import ( - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" ) -func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) { +func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { - opts := modelOpts(c, o, []model.Option{ - model.WithBackendString(c.Backend), - model.WithAssetDir(o.AssetsDestination), - model.WithThreads(uint32(c.Threads)), - model.WithContext(o.Context), - model.WithModel(c.Model), + opts := modelOpts(backendConfig, appConfig, []model.Option{ + model.WithBackendString(backendConfig.Backend), + model.WithAssetDir(appConfig.AssetsDestination), + model.WithThreads(uint32(backendConfig.Threads)), + model.WithContext(appConfig.Context), + model.WithModel(backendConfig.Model), model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{ - CUDA: c.CUDA || c.Diffusers.CUDA, - SchedulerType: c.Diffusers.SchedulerType, - PipelineType: c.Diffusers.PipelineType, - CFGScale: c.Diffusers.CFGScale, - LoraAdapter: c.LoraAdapter, - LoraScale: c.LoraScale, - LoraBase: c.LoraBase, - IMG2IMG: c.Diffusers.IMG2IMG, - CLIPModel: c.Diffusers.ClipModel, - CLIPSubfolder: c.Diffusers.ClipSubFolder, - CLIPSkip: int32(c.Diffusers.ClipSkip), - ControlNet: c.Diffusers.ControlNet, + CUDA: backendConfig.CUDA || backendConfig.Diffusers.CUDA, + SchedulerType: backendConfig.Diffusers.SchedulerType, + PipelineType: backendConfig.Diffusers.PipelineType, + CFGScale: backendConfig.Diffusers.CFGScale, + LoraAdapter: backendConfig.LoraAdapter, + LoraScale: backendConfig.LoraScale, + LoraBase: backendConfig.LoraBase, + IMG2IMG: backendConfig.Diffusers.IMG2IMG, + CLIPModel: backendConfig.Diffusers.ClipModel, + CLIPSubfolder: backendConfig.Diffusers.ClipSubFolder, + CLIPSkip: int32(backendConfig.Diffusers.ClipSkip), + ControlNet: backendConfig.Diffusers.ControlNet, }), }) @@ -40,19 +40,19 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat fn := func() error { _, err := inferenceModel.GenerateImage( - o.Context, + appConfig.Context, &proto.GenerateImageRequest{ Height: int32(height), Width: int32(width), Mode: int32(mode), Step: int32(step), Seed: int32(seed), - CLIPSkip: int32(c.Diffusers.ClipSkip), + CLIPSkip: int32(backendConfig.Diffusers.ClipSkip), PositivePrompt: positive_prompt, NegativePrompt: negative_prompt, Dst: dst, Src: src, - EnableParameters: c.Diffusers.EnableParameters, + EnableParameters: backendConfig.Diffusers.EnableParameters, }) return err } diff --git a/core/backend/llm.go b/core/backend/llm.go index d1081ad6..f16878c0 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -8,8 +8,8 @@ import ( "sync" "unicode/utf8" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" model "github.com/go-skynet/LocalAI/pkg/model" @@ -26,7 +26,7 @@ type TokenUsage struct { Completion int } -func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { +func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { modelFile := c.Model grpcOpts := gRPCModelOpts(c) @@ -140,7 +140,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp) var mu sync.Mutex = sync.Mutex{} -func Finetune(config config.Config, input, prediction string) string { +func Finetune(config config.BackendConfig, input, prediction string) string { if config.Echo { prediction = input + prediction } diff --git a/core/backend/options.go b/core/backend/options.go index 9710ac17..60160572 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -4,19 +4,17 @@ import ( "os" "path/filepath" + "github.com/go-skynet/LocalAI/core/config" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" - - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" ) -func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.Option { - if o.SingleBackend { +func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { + if so.SingleBackend { opts = append(opts, model.WithSingleActiveBackend()) } - if o.ParallelBackendRequests { + if so.ParallelBackendRequests { opts = append(opts, model.EnableParallelRequests) } @@ -28,14 +26,14 @@ func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model. opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) } - for k, v := range o.ExternalGRPCBackends { + for k, v := range so.ExternalGRPCBackends { opts = append(opts, model.WithExternalBackend(k, v)) } return opts } -func gRPCModelOpts(c config.Config) *pb.ModelOptions { +func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -84,7 +82,7 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions { } } -func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions { +func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { promptCachePath := "" if c.PromptCachePath != "" { p := filepath.Join(modelPath, c.PromptCachePath) diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 1cbaf820..bbb4f4b4 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -4,25 +4,24 @@ import ( "context" "fmt" - config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/core/options" "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*schema.Result, error) { +func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) { - opts := modelOpts(c, o, []model.Option{ + opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(model.WhisperBackend), - model.WithModel(c.Model), - model.WithContext(o.Context), - model.WithThreads(uint32(c.Threads)), - model.WithAssetDir(o.AssetsDestination), + model.WithModel(backendConfig.Model), + model.WithContext(appConfig.Context), + model.WithThreads(uint32(backendConfig.Threads)), + model.WithAssetDir(appConfig.AssetsDestination), }) - whisperModel, err := o.Loader.BackendLoader(opts...) + whisperModel, err := ml.BackendLoader(opts...) if err != nil { return nil, err } @@ -34,6 +33,6 @@ func ModelTranscription(audio, language string, loader *model.ModelLoader, c con return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{ Dst: audio, Language: language, - Threads: uint32(c.Threads), + Threads: uint32(backendConfig.Threads), }) } diff --git a/core/backend/tts.go b/core/backend/tts.go index a9d7153f..85aa3457 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -6,8 +6,8 @@ import ( "os" "path/filepath" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" @@ -29,22 +29,22 @@ func generateUniqueFileName(dir, baseName, ext string) string { } } -func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) { +func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend } - grpcOpts := gRPCModelOpts(c) + grpcOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(config.Config{}, o, []model.Option{ + opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), - model.WithContext(o.Context), - model.WithAssetDir(o.AssetsDestination), + model.WithContext(appConfig.Context), + model.WithAssetDir(appConfig.AssetsDestination), model.WithLoadGRPCLoadModelOpts(grpcOpts), }) - piperModel, err := o.Loader.BackendLoader(opts...) + piperModel, err := loader.BackendLoader(opts...) if err != nil { return "", nil, err } @@ -53,19 +53,19 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *opt return "", nil, fmt.Errorf("could not load piper model") } - if err := os.MkdirAll(o.AudioDir, 0755); err != nil { + if err := os.MkdirAll(appConfig.AudioDir, 0755); err != nil { return "", nil, fmt.Errorf("failed creating audio directory: %s", err) } - fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav") - filePath := filepath.Join(o.AudioDir, fileName) + fileName := generateUniqueFileName(appConfig.AudioDir, "piper", ".wav") + filePath := filepath.Join(appConfig.AudioDir, fileName) // If the model file is not empty, we pass it joined with the model path modelPath := "" if modelFile != "" { if bb != model.TransformersMusicGen { - modelPath = filepath.Join(o.Loader.ModelPath, modelFile) - if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil { + modelPath = filepath.Join(loader.ModelPath, modelFile) + if err := utils.VerifyPath(modelPath, appConfig.ModelPath); err != nil { return "", nil, err } } else { diff --git a/core/options/options.go b/core/config/application_config.go similarity index 69% rename from core/options/options.go rename to core/config/application_config.go index 72aea1a3..d90ae906 100644 --- a/core/options/options.go +++ b/core/config/application_config.go @@ -1,4 +1,4 @@ -package options +package config import ( "context" @@ -6,16 +6,14 @@ import ( "encoding/json" "time" - "github.com/go-skynet/LocalAI/metrics" "github.com/go-skynet/LocalAI/pkg/gallery" - model "github.com/go-skynet/LocalAI/pkg/model" "github.com/rs/zerolog/log" ) -type Option struct { +type ApplicationConfig struct { Context context.Context ConfigFile string - Loader *model.ModelLoader + ModelPath string UploadLimitMB, Threads, ContextSize int F16 bool Debug, DisableMessage bool @@ -27,7 +25,6 @@ type Option struct { PreloadModelsFromPath string CORSAllowOrigins string ApiKeys []string - Metrics *metrics.Metrics ModelLibraryURL string @@ -52,10 +49,10 @@ type Option struct { WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration } -type AppOption func(*Option) +type AppOption func(*ApplicationConfig) -func NewOptions(o ...AppOption) *Option { - opt := &Option{ +func NewApplicationConfig(o ...AppOption) *ApplicationConfig { + opt := &ApplicationConfig{ Context: context.Background(), UploadLimitMB: 15, Threads: 1, @@ -70,63 +67,69 @@ func NewOptions(o ...AppOption) *Option { } func WithModelsURL(urls ...string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.ModelsURL = urls } } +func WithModelPath(path string) AppOption { + return func(o *ApplicationConfig) { + o.ModelPath = path + } +} + func WithCors(b bool) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.CORS = b } } func WithModelLibraryURL(url string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.ModelLibraryURL = url } } -var EnableWatchDog = func(o *Option) { +var EnableWatchDog = func(o *ApplicationConfig) { o.WatchDog = true } -var EnableWatchDogIdleCheck = func(o *Option) { +var EnableWatchDogIdleCheck = func(o *ApplicationConfig) { o.WatchDog = true o.WatchDogIdle = true } -var EnableWatchDogBusyCheck = func(o *Option) { +var EnableWatchDogBusyCheck = func(o *ApplicationConfig) { o.WatchDog = true o.WatchDogBusy = true } func SetWatchDogBusyTimeout(t time.Duration) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.WatchDogBusyTimeout = t } } func SetWatchDogIdleTimeout(t time.Duration) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.WatchDogIdleTimeout = t } } -var EnableSingleBackend = func(o *Option) { +var EnableSingleBackend = func(o *ApplicationConfig) { o.SingleBackend = true } -var EnableParallelBackendRequests = func(o *Option) { +var EnableParallelBackendRequests = func(o *ApplicationConfig) { o.ParallelBackendRequests = true } -var EnableGalleriesAutoload = func(o *Option) { +var EnableGalleriesAutoload = func(o *ApplicationConfig) { o.AutoloadGalleries = true } func WithExternalBackend(name string, uri string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { if o.ExternalGRPCBackends == nil { o.ExternalGRPCBackends = make(map[string]string) } @@ -135,25 +138,25 @@ func WithExternalBackend(name string, uri string) AppOption { } func WithCorsAllowOrigins(b string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.CORSAllowOrigins = b } } func WithBackendAssetsOutput(out string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.AssetsDestination = out } } func WithBackendAssets(f embed.FS) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.BackendAssets = f } } func WithStringGalleries(galls string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { if galls == "" { log.Debug().Msgf("no galleries to load") o.Galleries = []gallery.Gallery{} @@ -168,102 +171,96 @@ func WithStringGalleries(galls string) AppOption { } func WithGalleries(galleries []gallery.Gallery) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.Galleries = append(o.Galleries, galleries...) } } func WithContext(ctx context.Context) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.Context = ctx } } func WithYAMLConfigPreload(configFile string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.PreloadModelsFromPath = configFile } } func WithJSONStringPreload(configFile string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.PreloadJSONModels = configFile } } func WithConfigFile(configFile string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.ConfigFile = configFile } } -func WithModelLoader(loader *model.ModelLoader) AppOption { - return func(o *Option) { - o.Loader = loader - } -} - func WithUploadLimitMB(limit int) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.UploadLimitMB = limit } } func WithThreads(threads int) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.Threads = threads } } func WithContextSize(ctxSize int) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.ContextSize = ctxSize } } func WithF16(f16 bool) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.F16 = f16 } } func WithDebug(debug bool) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.Debug = debug } } func WithDisableMessage(disableMessage bool) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.DisableMessage = disableMessage } } func WithAudioDir(audioDir string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.AudioDir = audioDir } } func WithImageDir(imageDir string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.ImageDir = imageDir } } func WithUploadDir(uploadDir string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.UploadDir = uploadDir } } func WithApiKeys(apiKeys []string) AppOption { - return func(o *Option) { + return func(o *ApplicationConfig) { o.ApiKeys = apiKeys } } -func WithMetrics(meter *metrics.Metrics) AppOption { - return func(o *Option) { - o.Metrics = meter - } -} +// func WithMetrics(meter *metrics.Metrics) AppOption { +// return func(o *StartupOptions) { +// o.Metrics = meter +// } +// } diff --git a/core/config/config.go b/core/config/backend_config.go similarity index 77% rename from core/config/config.go rename to core/config/backend_config.go index af203ecc..3098da86 100644 --- a/core/config/config.go +++ b/core/config/backend_config.go @@ -9,15 +9,16 @@ import ( "strings" "sync" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" "gopkg.in/yaml.v3" ) -type Config struct { - PredictionOptions `yaml:"parameters"` - Name string `yaml:"name"` +type BackendConfig struct { + schema.PredictionOptions `yaml:"parameters"` + Name string `yaml:"name"` F16 bool `yaml:"f16"` Threads int `yaml:"threads"` @@ -159,37 +160,55 @@ type TemplateConfig struct { Functions string `yaml:"function"` } -type ConfigLoader struct { - configs map[string]Config - sync.Mutex -} - -func (c *Config) SetFunctionCallString(s string) { +func (c *BackendConfig) SetFunctionCallString(s string) { c.functionCallString = s } -func (c *Config) SetFunctionCallNameString(s string) { +func (c *BackendConfig) SetFunctionCallNameString(s string) { c.functionCallNameString = s } -func (c *Config) ShouldUseFunctions() bool { +func (c *BackendConfig) ShouldUseFunctions() bool { return ((c.functionCallString != "none" || c.functionCallString == "") || c.ShouldCallSpecificFunction()) } -func (c *Config) ShouldCallSpecificFunction() bool { +func (c *BackendConfig) ShouldCallSpecificFunction() bool { return len(c.functionCallNameString) > 0 } -func (c *Config) FunctionToCall() string { +func (c *BackendConfig) FunctionToCall() string { return c.functionCallNameString } +func defaultPredictOptions(modelFile string) schema.PredictionOptions { + return schema.PredictionOptions{ + TopP: 0.7, + TopK: 80, + Maxtokens: 512, + Temperature: 0.9, + Model: modelFile, + } +} + +func DefaultConfig(modelFile string) *BackendConfig { + return &BackendConfig{ + PredictionOptions: defaultPredictOptions(modelFile), + } +} + +////// Config Loader //////// + +type BackendConfigLoader struct { + configs map[string]BackendConfig + sync.Mutex +} + // Load a config file for a model -func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) { +func LoadBackendConfigFileByName(modelName, modelPath string, cl *BackendConfigLoader, debug bool, threads, ctx int, f16 bool) (*BackendConfig, error) { // Load a config file if present after the model name modelConfig := filepath.Join(modelPath, modelName+".yaml") - var cfg *Config + var cfg *BackendConfig defaults := func() { cfg = DefaultConfig(modelName) @@ -199,13 +218,13 @@ func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ct cfg.Debug = debug } - cfgExisting, exists := cm.GetConfig(modelName) + cfgExisting, exists := cl.GetBackendConfig(modelName) if !exists { if _, err := os.Stat(modelConfig); err == nil { - if err := cm.LoadConfig(modelConfig); err != nil { + if err := cl.LoadBackendConfig(modelConfig); err != nil { return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) } - cfgExisting, exists = cm.GetConfig(modelName) + cfgExisting, exists = cl.GetBackendConfig(modelName) if exists { cfg = &cfgExisting } else { @@ -238,29 +257,13 @@ func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ct return cfg, nil } -func defaultPredictOptions(modelFile string) PredictionOptions { - return PredictionOptions{ - TopP: 0.7, - TopK: 80, - Maxtokens: 512, - Temperature: 0.9, - Model: modelFile, +func NewBackendConfigLoader() *BackendConfigLoader { + return &BackendConfigLoader{ + configs: make(map[string]BackendConfig), } } - -func DefaultConfig(modelFile string) *Config { - return &Config{ - PredictionOptions: defaultPredictOptions(modelFile), - } -} - -func NewConfigLoader() *ConfigLoader { - return &ConfigLoader{ - configs: make(map[string]Config), - } -} -func ReadConfigFile(file string) ([]*Config, error) { - c := &[]*Config{} +func ReadBackendConfigFile(file string) ([]*BackendConfig, error) { + c := &[]*BackendConfig{} f, err := os.ReadFile(file) if err != nil { return nil, fmt.Errorf("cannot read config file: %w", err) @@ -272,8 +275,8 @@ func ReadConfigFile(file string) ([]*Config, error) { return *c, nil } -func ReadConfig(file string) (*Config, error) { - c := &Config{} +func ReadBackendConfig(file string) (*BackendConfig, error) { + c := &BackendConfig{} f, err := os.ReadFile(file) if err != nil { return nil, fmt.Errorf("cannot read config file: %w", err) @@ -285,10 +288,10 @@ func ReadConfig(file string) (*Config, error) { return c, nil } -func (cm *ConfigLoader) LoadConfigFile(file string) error { +func (cm *BackendConfigLoader) LoadBackendConfigFile(file string) error { cm.Lock() defer cm.Unlock() - c, err := ReadConfigFile(file) + c, err := ReadBackendConfigFile(file) if err != nil { return fmt.Errorf("cannot load config file: %w", err) } @@ -299,49 +302,49 @@ func (cm *ConfigLoader) LoadConfigFile(file string) error { return nil } -func (cm *ConfigLoader) LoadConfig(file string) error { - cm.Lock() - defer cm.Unlock() - c, err := ReadConfig(file) +func (cl *BackendConfigLoader) LoadBackendConfig(file string) error { + cl.Lock() + defer cl.Unlock() + c, err := ReadBackendConfig(file) if err != nil { return fmt.Errorf("cannot read config file: %w", err) } - cm.configs[c.Name] = *c + cl.configs[c.Name] = *c return nil } -func (cm *ConfigLoader) GetConfig(m string) (Config, bool) { - cm.Lock() - defer cm.Unlock() - v, exists := cm.configs[m] +func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { + cl.Lock() + defer cl.Unlock() + v, exists := cl.configs[m] return v, exists } -func (cm *ConfigLoader) GetAllConfigs() []Config { - cm.Lock() - defer cm.Unlock() - var res []Config - for _, v := range cm.configs { +func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { + cl.Lock() + defer cl.Unlock() + var res []BackendConfig + for _, v := range cl.configs { res = append(res, v) } return res } -func (cm *ConfigLoader) ListConfigs() []string { - cm.Lock() - defer cm.Unlock() +func (cl *BackendConfigLoader) ListBackendConfigs() []string { + cl.Lock() + defer cl.Unlock() var res []string - for k := range cm.configs { + for k := range cl.configs { res = append(res, k) } return res } // Preload prepare models if they are not local but url or huggingface repositories -func (cm *ConfigLoader) Preload(modelPath string) error { - cm.Lock() - defer cm.Unlock() +func (cl *BackendConfigLoader) Preload(modelPath string) error { + cl.Lock() + defer cl.Unlock() status := func(fileName, current, total string, percent float64) { utils.DisplayDownloadFunction(fileName, current, total, percent) @@ -349,7 +352,7 @@ func (cm *ConfigLoader) Preload(modelPath string) error { log.Info().Msgf("Preloading models from %s", modelPath) - for i, config := range cm.configs { + for i, config := range cl.configs { // Download files and verify their SHA for _, file := range config.DownloadFiles { @@ -381,25 +384,25 @@ func (cm *ConfigLoader) Preload(modelPath string) error { } } - cc := cm.configs[i] + cc := cl.configs[i] c := &cc c.PredictionOptions.Model = md5Name - cm.configs[i] = *c + cl.configs[i] = *c } - if cm.configs[i].Name != "" { - log.Info().Msgf("Model name: %s", cm.configs[i].Name) + if cl.configs[i].Name != "" { + log.Info().Msgf("Model name: %s", cl.configs[i].Name) } - if cm.configs[i].Description != "" { - log.Info().Msgf("Model description: %s", cm.configs[i].Description) + if cl.configs[i].Description != "" { + log.Info().Msgf("Model description: %s", cl.configs[i].Description) } - if cm.configs[i].Usage != "" { - log.Info().Msgf("Model usage: \n%s", cm.configs[i].Usage) + if cl.configs[i].Usage != "" { + log.Info().Msgf("Model usage: \n%s", cl.configs[i].Usage) } } return nil } -func (cm *ConfigLoader) LoadConfigs(path string) error { +func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string) error { cm.Lock() defer cm.Unlock() entries, err := os.ReadDir(path) @@ -419,7 +422,7 @@ func (cm *ConfigLoader) LoadConfigs(path string) error { if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { continue } - c, err := ReadConfig(filepath.Join(path, file.Name())) + c, err := ReadBackendConfig(filepath.Join(path, file.Name())) if err == nil { cm.configs[c.Name] = *c } diff --git a/core/config/config_test.go b/core/config/config_test.go index d1e92d5c..b18e083f 100644 --- a/core/config/config_test.go +++ b/core/config/config_test.go @@ -4,8 +4,7 @@ import ( "os" . "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" - "github.com/go-skynet/LocalAI/pkg/model" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -19,7 +18,7 @@ var _ = Describe("Test cases for config related functions", func() { Context("Test Read configuration functions", func() { configFile = os.Getenv("CONFIG_FILE") It("Test ReadConfigFile", func() { - config, err := ReadConfigFile(configFile) + config, err := ReadBackendConfigFile(configFile) Expect(err).To(BeNil()) Expect(config).ToNot(BeNil()) // two configs in config.yaml @@ -28,29 +27,26 @@ var _ = Describe("Test cases for config related functions", func() { }) It("Test LoadConfigs", func() { - cm := NewConfigLoader() - opts := options.NewOptions() - modelLoader := model.NewModelLoader(os.Getenv("MODELS_PATH")) - options.WithModelLoader(modelLoader)(opts) - - err := cm.LoadConfigs(opts.Loader.ModelPath) + cm := NewBackendConfigLoader() + opts := NewApplicationConfig() + err := cm.LoadBackendConfigsFromPath(opts.ModelPath) Expect(err).To(BeNil()) - Expect(cm.ListConfigs()).ToNot(BeNil()) + Expect(cm.ListBackendConfigs()).ToNot(BeNil()) // config should includes gpt4all models's api.config - Expect(cm.ListConfigs()).To(ContainElements("gpt4all")) + Expect(cm.ListBackendConfigs()).To(ContainElements("gpt4all")) // config should includes gpt2 models's api.config - Expect(cm.ListConfigs()).To(ContainElements("gpt4all-2")) + Expect(cm.ListBackendConfigs()).To(ContainElements("gpt4all-2")) // config should includes text-embedding-ada-002 models's api.config - Expect(cm.ListConfigs()).To(ContainElements("text-embedding-ada-002")) + Expect(cm.ListBackendConfigs()).To(ContainElements("text-embedding-ada-002")) // config should includes rwkv_test models's api.config - Expect(cm.ListConfigs()).To(ContainElements("rwkv_test")) + Expect(cm.ListBackendConfigs()).To(ContainElements("rwkv_test")) // config should includes whisper-1 models's api.config - Expect(cm.ListConfigs()).To(ContainElements("whisper-1")) + Expect(cm.ListBackendConfigs()).To(ContainElements("whisper-1")) }) }) }) diff --git a/core/http/api.go b/core/http/api.go index 7d228152..e2646a14 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -3,122 +3,29 @@ package http import ( "encoding/json" "errors" - "fmt" "os" "strings" - "github.com/go-skynet/LocalAI/api/localai" - "github.com/go-skynet/LocalAI/api/openai" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" + "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/metrics" - "github.com/go-skynet/LocalAI/pkg/assets" "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/startup" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/recover" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" ) -func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, error) { - options := options.NewOptions(opts...) - - zerolog.SetGlobalLevel(zerolog.InfoLevel) - if options.Debug { - zerolog.SetGlobalLevel(zerolog.DebugLevel) - } - - log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath) - log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) - - startup.PreloadModelsConfigurations(options.ModelLibraryURL, options.Loader.ModelPath, options.ModelsURL...) - - cl := config.NewConfigLoader() - if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil { - log.Error().Msgf("error loading config files: %s", err.Error()) - } - - if options.ConfigFile != "" { - if err := cl.LoadConfigFile(options.ConfigFile); err != nil { - log.Error().Msgf("error loading config file: %s", err.Error()) - } - } - - if err := cl.Preload(options.Loader.ModelPath); err != nil { - log.Error().Msgf("error downloading models: %s", err.Error()) - } - - if options.PreloadJSONModels != "" { - if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil { - return nil, nil, err - } - } - - if options.PreloadModelsFromPath != "" { - if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil { - return nil, nil, err - } - } - - if options.Debug { - for _, v := range cl.ListConfigs() { - cfg, _ := cl.GetConfig(v) - log.Debug().Msgf("Model: %s (config: %+v)", v, cfg) - } - } - - if options.AssetsDestination != "" { - // Extract files from the embedded FS - err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) - log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) - if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) - } - } - - // turn off any process that was started by GRPC if the context is canceled - go func() { - <-options.Context.Done() - log.Debug().Msgf("Context canceled, shutting down") - options.Loader.StopAllGRPC() - }() - - if options.WatchDog { - wd := model.NewWatchDog( - options.Loader, - options.WatchDogBusyTimeout, - options.WatchDogIdleTimeout, - options.WatchDogBusy, - options.WatchDogIdle) - options.Loader.SetWatchDog(wd) - go wd.Run() - go func() { - <-options.Context.Done() - log.Debug().Msgf("Context canceled, shutting down") - wd.Shutdown() - }() - } - - return options, cl, nil -} - -func App(opts ...options.AppOption) (*fiber.App, error) { - - options, cl, err := Startup(opts...) - if err != nil { - return nil, fmt.Errorf("failed basic startup tasks with error %s", err.Error()) - } - +func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ - BodyLimit: options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB - DisableStartupMessage: options.DisableMessage, + BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + DisableStartupMessage: appConfig.DisableMessage, // Override default error handler ErrorHandler: func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -139,7 +46,7 @@ func App(opts ...options.AppOption) (*fiber.App, error) { }, }) - if options.Debug { + if appConfig.Debug { app.Use(logger.New(logger.Config{ Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", })) @@ -147,17 +54,25 @@ func App(opts ...options.AppOption) (*fiber.App, error) { // Default middleware config - if !options.Debug { + if !appConfig.Debug { app.Use(recover.New()) } - if options.Metrics != nil { - app.Use(metrics.APIMiddleware(options.Metrics)) + metricsService, err := services.NewLocalAIMetricsService() + if err != nil { + return nil, err + } + + if metricsService != nil { + app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService)) + app.Hooks().OnShutdown(func() error { + return metricsService.Shutdown() + }) } // Auth middleware checking if API key is valid. If no API key is set, no auth is required. auth := func(c *fiber.Ctx) error { - if len(options.ApiKeys) == 0 { + if len(appConfig.ApiKeys) == 0 { return c.Next() } @@ -172,10 +87,10 @@ func App(opts ...options.AppOption) (*fiber.App, error) { } // Add file keys to options.ApiKeys - options.ApiKeys = append(options.ApiKeys, fileKeys...) + appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) } - if len(options.ApiKeys) == 0 { + if len(appConfig.ApiKeys) == 0 { return c.Next() } @@ -189,7 +104,7 @@ func App(opts ...options.AppOption) (*fiber.App, error) { } apiKey := authHeaderParts[1] - for _, key := range options.ApiKeys { + for _, key := range appConfig.ApiKeys { if apiKey == key { return c.Next() } @@ -199,20 +114,20 @@ func App(opts ...options.AppOption) (*fiber.App, error) { } - if options.CORS { + if appConfig.CORS { var c func(ctx *fiber.Ctx) error - if options.CORSAllowOrigins == "" { + if appConfig.CORSAllowOrigins == "" { c = cors.New() } else { - c = cors.New(cors.Config{AllowOrigins: options.CORSAllowOrigins}) + c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins}) } app.Use(c) } // LocalAI API endpoints - galleryService := localai.NewGalleryService(options.Loader.ModelPath) - galleryService.Start(options.Context, cl) + galleryService := services.NewGalleryService(appConfig.ModelPath) + galleryService.Start(appConfig.Context, cl) app.Get("/version", auth, func(c *fiber.Ctx) error { return c.JSON(struct { @@ -220,69 +135,63 @@ func App(opts ...options.AppOption) (*fiber.App, error) { }{Version: internal.PrintableVersion()}) }) - // Make sure directories exists - os.MkdirAll(options.ImageDir, 0755) - os.MkdirAll(options.AudioDir, 0755) - os.MkdirAll(options.UploadDir, 0755) - os.MkdirAll(options.Loader.ModelPath, 0755) - // Load upload json - openai.LoadUploadConfig(options.UploadDir) + openai.LoadUploadConfig(appConfig.UploadDir) - modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService) - app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint()) - app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint()) - app.Get("/models/galleries", auth, modelGalleryService.ListModelGalleriesEndpoint()) - app.Post("/models/galleries", auth, modelGalleryService.AddModelGalleryEndpoint()) - app.Delete("/models/galleries", auth, modelGalleryService.RemoveModelGalleryEndpoint()) - app.Get("/models/jobs/:uuid", auth, modelGalleryService.GetOpStatusEndpoint()) - app.Get("/models/jobs", auth, modelGalleryService.GetAllStatusEndpoint()) + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) + app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) + app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) + app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) + app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint()) + app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) + app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) // openAI compatible API endpoint // chat - app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, options)) - app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, options)) + app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) // edit - app.Post("/v1/edits", auth, openai.EditEndpoint(cl, options)) - app.Post("/edits", auth, openai.EditEndpoint(cl, options)) + app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) // files - app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, options)) - app.Post("/files", auth, openai.UploadFilesEndpoint(cl, options)) - app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, options)) - app.Get("/files", auth, openai.ListFilesEndpoint(cl, options)) - app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, options)) - app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, options)) - app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, options)) - app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, options)) - app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, options)) - app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, options)) + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) // completion - app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, options)) - app.Post("/completions", auth, openai.CompletionEndpoint(cl, options)) - app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, options)) + app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) // embeddings - app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, options)) - app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, options)) - app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, options)) + app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) // audio - app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, options)) - app.Post("/tts", auth, localai.TTSEndpoint(cl, options)) + app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) + app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) // images - app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, options)) + app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) - if options.ImageDir != "" { - app.Static("/generated-images", options.ImageDir) + if appConfig.ImageDir != "" { + app.Static("/generated-images", appConfig.ImageDir) } - if options.AudioDir != "" { - app.Static("/generated-audio", options.AudioDir) + if appConfig.AudioDir != "" { + app.Static("/generated-audio", appConfig.AudioDir) } ok := func(c *fiber.Ctx) error { @@ -294,15 +203,15 @@ func App(opts ...options.AppOption) (*fiber.App, error) { app.Get("/readyz", ok) // Experimental Backend Statistics Module - backendMonitor := localai.NewBackendMonitor(cl, options) // Split out for now + backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor)) app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor)) // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl)) - app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl)) + app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/metrics", metrics.MetricsHandler()) + app.Get("/metrics", localai.LocalAIMetricsEndpoint()) return app, nil } diff --git a/core/http/api_test.go b/core/http/api_test.go index 9068b393..8f3cfc91 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -13,9 +13,10 @@ import ( "path/filepath" "runtime" + "github.com/go-skynet/LocalAI/core/config" . "github.com/go-skynet/LocalAI/core/http" - "github.com/go-skynet/LocalAI/core/options" - "github.com/go-skynet/LocalAI/metrics" + "github.com/go-skynet/LocalAI/core/startup" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" @@ -127,25 +128,33 @@ var backendAssets embed.FS var _ = Describe("API test", func() { var app *fiber.App - var modelLoader *model.ModelLoader var client *openai.Client var client2 *openaigo.Client var c context.Context var cancel context.CancelFunc var tmpdir string + var modelDir string + var bcl *config.BackendConfigLoader + var ml *model.ModelLoader + var applicationConfig *config.ApplicationConfig - commonOpts := []options.AppOption{ - options.WithDebug(true), - options.WithDisableMessage(true), + commonOpts := []config.AppOption{ + config.WithDebug(true), + config.WithDisableMessage(true), } Context("API with ephemeral models", func() { - BeforeEach(func() { + + BeforeEach(func(sc SpecContext) { var err error tmpdir, err = os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) - modelLoader = model.NewModelLoader(tmpdir) + modelDir = filepath.Join(tmpdir, "models") + backendAssetsDir := filepath.Join(tmpdir, "backend-assets") + err = os.Mkdir(backendAssetsDir, 0755) + Expect(err).ToNot(HaveOccurred()) + c, cancel = context.WithCancel(context.Background()) g := []gallery.GalleryModel{ @@ -172,16 +181,18 @@ var _ = Describe("API test", func() { }, } - metricsService, err := metrics.SetupMetrics() + bcl, ml, applicationConfig, err = startup.Startup( + append(commonOpts, + config.WithContext(c), + config.WithGalleries(galleries), + config.WithModelPath(modelDir), + config.WithBackendAssets(backendAssets), + config.WithBackendAssetsOutput(backendAssetsDir))...) Expect(err).ToNot(HaveOccurred()) - app, err = App( - append(commonOpts, - options.WithMetrics(metricsService), - options.WithContext(c), - options.WithGalleries(galleries), - options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) + go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("") @@ -198,15 +209,21 @@ var _ = Describe("API test", func() { }, "2m").ShouldNot(HaveOccurred()) }) - AfterEach(func() { + AfterEach(func(sc SpecContext) { cancel() - app.Shutdown() - os.RemoveAll(tmpdir) + if app != nil { + err := app.Shutdown() + Expect(err).ToNot(HaveOccurred()) + } + err := os.RemoveAll(tmpdir) + Expect(err).ToNot(HaveOccurred()) + _, err = os.ReadDir(tmpdir) + Expect(err).To(HaveOccurred()) }) Context("Applying models", func() { - It("applies models from a gallery", func() { + It("applies models from a gallery", func() { models := getModels("http://127.0.0.1:9090/models/available") Expect(len(models)).To(Equal(2), fmt.Sprint(models)) Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models)) @@ -228,10 +245,10 @@ var _ = Describe("API test", func() { }, "360s", "10s").Should(Equal(true)) Expect(resp["message"]).ToNot(ContainSubstring("error")) - dat, err := os.ReadFile(filepath.Join(tmpdir, "bert2.yaml")) + dat, err := os.ReadFile(filepath.Join(modelDir, "bert2.yaml")) Expect(err).ToNot(HaveOccurred()) - _, err = os.ReadFile(filepath.Join(tmpdir, "foo.yaml")) + _, err = os.ReadFile(filepath.Join(modelDir, "foo.yaml")) Expect(err).ToNot(HaveOccurred()) content := map[string]interface{}{} @@ -253,6 +270,7 @@ var _ = Describe("API test", func() { } }) It("overrides models", func() { + response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", Name: "bert", @@ -270,7 +288,7 @@ var _ = Describe("API test", func() { return response["processed"].(bool) }, "360s", "10s").Should(Equal(true)) - dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml")) + dat, err := os.ReadFile(filepath.Join(modelDir, "bert.yaml")) Expect(err).ToNot(HaveOccurred()) content := map[string]interface{}{} @@ -294,7 +312,7 @@ var _ = Describe("API test", func() { return response["processed"].(bool) }, "360s", "10s").Should(Equal(true)) - dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml")) + dat, err := os.ReadFile(filepath.Join(modelDir, "bert.yaml")) Expect(err).ToNot(HaveOccurred()) content := map[string]interface{}{} @@ -483,8 +501,11 @@ var _ = Describe("API test", func() { var err error tmpdir, err = os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) + modelDir = filepath.Join(tmpdir, "models") + backendAssetsDir := filepath.Join(tmpdir, "backend-assets") + err = os.Mkdir(backendAssetsDir, 0755) + Expect(err).ToNot(HaveOccurred()) - modelLoader = model.NewModelLoader(tmpdir) c, cancel = context.WithCancel(context.Background()) galleries := []gallery.Gallery{ @@ -494,21 +515,20 @@ var _ = Describe("API test", func() { }, } - metricsService, err := metrics.SetupMetrics() - Expect(err).ToNot(HaveOccurred()) - - app, err = App( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, - options.WithContext(c), - options.WithMetrics(metricsService), - options.WithAudioDir(tmpdir), - options.WithImageDir(tmpdir), - options.WithGalleries(galleries), - options.WithModelLoader(modelLoader), - options.WithBackendAssets(backendAssets), - options.WithBackendAssetsOutput(tmpdir))..., + config.WithContext(c), + config.WithAudioDir(tmpdir), + config.WithImageDir(tmpdir), + config.WithGalleries(galleries), + config.WithModelPath(modelDir), + config.WithBackendAssets(backendAssets), + config.WithBackendAssetsOutput(tmpdir))..., ) Expect(err).ToNot(HaveOccurred()) + app, err = App(bcl, ml, applicationConfig) + Expect(err).ToNot(HaveOccurred()) + go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("") @@ -527,8 +547,14 @@ var _ = Describe("API test", func() { AfterEach(func() { cancel() - app.Shutdown() - os.RemoveAll(tmpdir) + if app != nil { + err := app.Shutdown() + Expect(err).ToNot(HaveOccurred()) + } + err := os.RemoveAll(tmpdir) + Expect(err).ToNot(HaveOccurred()) + _, err = os.ReadDir(tmpdir) + Expect(err).To(HaveOccurred()) }) It("installs and is capable to run tts", Label("tts"), func() { if runtime.GOOS != "linux" { @@ -599,20 +625,20 @@ var _ = Describe("API test", func() { Context("API query", func() { BeforeEach(func() { - modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) + modelPath := os.Getenv("MODELS_PATH") c, cancel = context.WithCancel(context.Background()) - metricsService, err := metrics.SetupMetrics() - Expect(err).ToNot(HaveOccurred()) + var err error - app, err = App( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, - options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), - options.WithContext(c), - options.WithModelLoader(modelLoader), - options.WithMetrics(metricsService), + config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), + config.WithContext(c), + config.WithModelPath(modelPath), )...) Expect(err).ToNot(HaveOccurred()) + app, err = App(bcl, ml, applicationConfig) + Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("") @@ -630,7 +656,10 @@ var _ = Describe("API test", func() { }) AfterEach(func() { cancel() - app.Shutdown() + if app != nil { + err := app.Shutdown() + Expect(err).ToNot(HaveOccurred()) + } }) It("returns the models list", func() { models, err := client.ListModels(context.TODO()) @@ -811,20 +840,20 @@ var _ = Describe("API test", func() { Context("Config file", func() { BeforeEach(func() { - modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) + modelPath := os.Getenv("MODELS_PATH") c, cancel = context.WithCancel(context.Background()) - metricsService, err := metrics.SetupMetrics() - Expect(err).ToNot(HaveOccurred()) - - app, err = App( + var err error + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, - options.WithContext(c), - options.WithMetrics(metricsService), - options.WithModelLoader(modelLoader), - options.WithConfigFile(os.Getenv("CONFIG_FILE")))..., + config.WithContext(c), + config.WithModelPath(modelPath), + config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., ) Expect(err).ToNot(HaveOccurred()) + app, err = App(bcl, ml, applicationConfig) + Expect(err).ToNot(HaveOccurred()) + go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("") @@ -840,7 +869,10 @@ var _ = Describe("API test", func() { }) AfterEach(func() { cancel() - app.Shutdown() + if app != nil { + err := app.Shutdown() + Expect(err).ToNot(HaveOccurred()) + } }) It("can generate chat completions from config file (list1)", func() { resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) diff --git a/api/ctx/fiber.go b/core/http/ctx/fiber.go similarity index 100% rename from api/ctx/fiber.go rename to core/http/ctx/fiber.go diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go new file mode 100644 index 00000000..8c7a664a --- /dev/null +++ b/core/http/endpoints/localai/backend_monitor.go @@ -0,0 +1,36 @@ +package localai + +import ( + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/core/services" + "github.com/gofiber/fiber/v2" +) + +func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + + input := new(schema.BackendMonitorRequest) + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + resp, err := bm.CheckAndSample(input.Model) + if err != nil { + return err + } + return c.JSON(resp) + } +} + +func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(schema.BackendMonitorRequest) + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + return bm.ShutdownModel(input.Model) + } +} diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go new file mode 100644 index 00000000..5c295a2a --- /dev/null +++ b/core/http/endpoints/localai/gallery.go @@ -0,0 +1,146 @@ +package localai + +import ( + "encoding/json" + "fmt" + "slices" + + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "github.com/rs/zerolog/log" +) + +type ModelGalleryEndpointService struct { + galleries []gallery.Gallery + modelPath string + galleryApplier *services.GalleryService +} + +type GalleryModel struct { + ID string `json:"id"` + gallery.GalleryModel +} + +func CreateModelGalleryEndpointService(galleries []gallery.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService { + return ModelGalleryEndpointService{ + galleries: galleries, + modelPath: modelPath, + galleryApplier: galleryApplier, + } +} + +func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + status := mgs.galleryApplier.GetStatus(c.Params("uuid")) + if status == nil { + return fmt.Errorf("could not find any status for ID") + } + return c.JSON(status) + } +} + +func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + return c.JSON(mgs.galleryApplier.GetAllStatus()) + } +} + +func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(GalleryModel) + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + uuid, err := uuid.NewUUID() + if err != nil { + return err + } + mgs.galleryApplier.C <- gallery.GalleryOp{ + Req: input.GalleryModel, + Id: uuid.String(), + GalleryName: input.ID, + Galleries: mgs.galleries, + } + return c.JSON(struct { + ID string `json:"uuid"` + StatusURL string `json:"status"` + }{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) + } +} + +func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries) + + models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath) + if err != nil { + return err + } + log.Debug().Msgf("Models found from galleries: %+v", models) + for _, m := range models { + log.Debug().Msgf("Model found from galleries: %+v", m) + } + dat, err := json.Marshal(models) + if err != nil { + return err + } + return c.Send(dat) + } +} + +// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents! +func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + log.Debug().Msgf("Listing model galleries %+v", mgs.galleries) + dat, err := json.Marshal(mgs.galleries) + if err != nil { + return err + } + return c.Send(dat) + } +} + +func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(gallery.Gallery) + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool { + return gallery.Name == input.Name + }) { + return fmt.Errorf("%s already exists", input.Name) + } + dat, err := json.Marshal(mgs.galleries) + if err != nil { + return err + } + log.Debug().Msgf("Adding %+v to gallery list", *input) + mgs.galleries = append(mgs.galleries, *input) + return c.Send(dat) + } +} + +func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(gallery.Gallery) + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool { + return gallery.Name == input.Name + }) { + return fmt.Errorf("%s is not currently registered", input.Name) + } + mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool { + return gallery.Name == input.Name + }) + return c.Send(nil) + } +} diff --git a/core/http/endpoints/localai/metrics.go b/core/http/endpoints/localai/metrics.go new file mode 100644 index 00000000..23c2af7a --- /dev/null +++ b/core/http/endpoints/localai/metrics.go @@ -0,0 +1,43 @@ +package localai + +import ( + "time" + + "github.com/go-skynet/LocalAI/core/services" + "github.com/gofiber/fiber/v2" + "github.com/gofiber/fiber/v2/middleware/adaptor" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +func LocalAIMetricsEndpoint() fiber.Handler { + + return adaptor.HTTPHandler(promhttp.Handler()) +} + +type apiMiddlewareConfig struct { + Filter func(c *fiber.Ctx) bool + metricsService *services.LocalAIMetricsService +} + +func LocalAIMetricsAPIMiddleware(metrics *services.LocalAIMetricsService) fiber.Handler { + cfg := apiMiddlewareConfig{ + metricsService: metrics, + Filter: func(c *fiber.Ctx) bool { + return c.Path() == "/metrics" + }, + } + + return func(c *fiber.Ctx) error { + if cfg.Filter != nil && cfg.Filter(c) { + return c.Next() + } + path := c.Path() + method := c.Method() + + start := time.Now() + err := c.Next() + elapsed := float64(time.Since(start)) / float64(time.Second) + cfg.metricsService.ObserveAPICall(method, path, elapsed) + return err + } +} diff --git a/api/localai/localai.go b/core/http/endpoints/localai/tts.go similarity index 56% rename from api/localai/localai.go rename to core/http/endpoints/localai/tts.go index 9d5bbf6c..84fb7a55 100644 --- a/api/localai/localai.go +++ b/core/http/endpoints/localai/tts.go @@ -1,37 +1,32 @@ package localai import ( - fiberContext "github.com/go-skynet/LocalAI/api/ctx" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/rs/zerolog/log" + "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" ) -type TTSRequest struct { - Model string `json:"model" yaml:"model"` - Input string `json:"input" yaml:"input"` - Backend string `json:"backend" yaml:"backend"` -} - -func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - input := new(TTSRequest) + input := new(schema.TTSRequest) // Get input data from the request body if err := c.BodyParser(input); err != nil { return err } - modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, false) + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) if err != nil { modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } - cfg, err := config.Load(modelFile, o.Loader.ModelPath, cm, false, 0, 0, false) + cfg, err := config.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, cl, false, 0, 0, false) if err != nil { modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) @@ -44,7 +39,7 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) cfg.Backend = input.Backend } - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg) + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, ml, appConfig, *cfg) if err != nil { return err } diff --git a/api/openai/chat.go b/core/http/endpoints/openai/chat.go similarity index 90% rename from api/openai/chat.go rename to core/http/endpoints/openai/chat.go index cd535f0a..3add0972 100644 --- a/api/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -9,8 +9,7 @@ import ( "time" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" @@ -21,12 +20,12 @@ import ( "github.com/valyala/fasthttp" ) -func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { emptyMessage := "" id := uuid.New().String() created := int(time.Now().Unix()) - process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { initialMessage := schema.OpenAIResponse{ ID: id, Created: created, @@ -36,7 +35,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } responses <- initialMessage - ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { resp := schema.OpenAIResponse{ ID: id, Created: created, @@ -55,9 +54,9 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) }) close(responses) } - processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { result := "" - _, tokenUsage, _ := ComputeChoices(req, prompt, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { result += s // TODO: Change generated BNF grammar to be compliant with the schema so we can // stream the result token by token here. @@ -78,7 +77,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } responses <- initialMessage - result, err := handleQuestion(config, req, o, results[0].arguments, prompt) + result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) if err != nil { log.Error().Msgf("error handling question: %s", err.Error()) return @@ -154,12 +153,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) return func(c *fiber.Ctx) error { processFunctions := false funcs := grammar.Functions{} - modelFile, input, err := readRequest(c, o, true) + modelFile, input, err := readRequest(c, ml, startupOptions, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -252,7 +251,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) FunctionName: i.Name, MessageIndex: messageIndex, } - templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) + templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) if err != nil { log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, config.TemplateConfig.ChatMessage, err) } else { @@ -320,7 +319,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) templateFile := "" // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { templateFile = config.Model } @@ -333,7 +332,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } if templateFile != "" { - templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ + templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ SystemPrompt: config.SystemPrompt, SuppressSystemPrompt: suppressConfigSystemPrompt, Input: predInput, @@ -357,9 +356,9 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) responses := make(chan schema.OpenAIResponse) if !processFunctions { - go process(predInput, input, config, o.Loader, responses) + go process(predInput, input, config, ml, responses) } else { - go processTools(noActionName, predInput, input, config, o.Loader, responses) + go processTools(noActionName, predInput, input, config, ml, responses) } c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { @@ -413,7 +412,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) // no streaming mode default: - result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) { + result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) { if !processFunctions { // no function is called, just reply and use stop as finish reason *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) @@ -425,7 +424,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) switch { case noActionsToRun: - result, err := handleQuestion(config, input, o, results[0].arguments, predInput) + result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) if err != nil { log.Error().Msgf("error handling question: %s", err.Error()) return @@ -506,7 +505,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } } -func handleQuestion(config *config.Config, input *schema.OpenAIRequest, o *options.Option, args, prompt string) (string, error) { +func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) { log.Debug().Msgf("nothing to do, computing a reply") // If there is a message that the LLM already sends as part of the JSON reply, use it @@ -535,7 +534,7 @@ func handleQuestion(config *config.Config, input *schema.OpenAIRequest, o *optio images = append(images, m.StringImages...) } - predFunc, err := backend.ModelInference(input.Context, prompt, images, o.Loader, *config, o, nil) + predFunc, err := backend.ModelInference(input.Context, prompt, images, ml, *config, o, nil) if err != nil { log.Error().Msgf("inference error: %s", err.Error()) return "", err diff --git a/api/openai/completion.go b/core/http/endpoints/openai/completion.go similarity index 82% rename from api/openai/completion.go rename to core/http/endpoints/openai/completion.go index af56625e..9344f9fe 100644 --- a/api/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -9,8 +9,8 @@ import ( "time" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" @@ -21,12 +21,12 @@ import ( ) // https://platform.openai.com/docs/api-reference/completions -func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { id := uuid.New().String() created := int(time.Now().Unix()) - process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { resp := schema.OpenAIResponse{ ID: id, Created: created, @@ -53,14 +53,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe } return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, o, true) + modelFile, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } log.Debug().Msgf("`input`: %+v", input) - config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -84,7 +84,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe templateFile := "" // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { templateFile = config.Model } @@ -100,7 +100,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe predInput := config.PromptStrings[0] if templateFile != "" { - templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ + templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ Input: predInput, }) if err == nil { @@ -111,7 +111,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe responses := make(chan schema.OpenAIResponse) - go process(predInput, input, config, o.Loader, responses) + go process(predInput, input, config, ml, responses) c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { @@ -153,7 +153,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe for k, i := range config.PromptStrings { if templateFile != "" { // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ + templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ SystemPrompt: config.SystemPrompt, Input: i, }) @@ -164,7 +164,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe } r, tokenUsage, err := ComputeChoices( - input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) { + input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k}) }, nil) if err != nil { diff --git a/api/openai/edit.go b/core/http/endpoints/openai/edit.go similarity index 77% rename from api/openai/edit.go rename to core/http/endpoints/openai/edit.go index 56b17920..25497095 100644 --- a/api/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -6,8 +6,8 @@ import ( "time" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" @@ -16,14 +16,14 @@ import ( "github.com/rs/zerolog/log" ) -func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, o, true) + modelFile, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -33,7 +33,7 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) templateFile := "" // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { templateFile = config.Model } @@ -46,7 +46,7 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) for _, i := range config.InputStrings { if templateFile != "" { - templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ + templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ Input: i, Instruction: input.Instruction, SystemPrompt: config.SystemPrompt, @@ -57,7 +57,7 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } } - r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) { + r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { *c = append(*c, schema.Choice{Text: s}) }, nil) if err != nil { diff --git a/api/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go similarity index 73% rename from api/openai/embeddings.go rename to core/http/endpoints/openai/embeddings.go index 198493e1..774b0a5e 100644 --- a/api/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -6,24 +6,25 @@ import ( "time" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/core/schema" "github.com/google/uuid" - "github.com/go-skynet/LocalAI/core/options" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) // https://platform.openai.com/docs/api-reference/embeddings -func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - model, input, err := readRequest(c, o, true) + model, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -33,7 +34,7 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe for i, s := range config.InputToken { // get the model function to call for the result - embedFn, err := backend.ModelEmbedding("", s, o.Loader, *config, o) + embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig) if err != nil { return err } @@ -47,7 +48,7 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe for i, s := range config.InputStrings { // get the model function to call for the result - embedFn, err := backend.ModelEmbedding(s, []int{}, o.Loader, *config, o) + embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig) if err != nil { return err } diff --git a/api/openai/files.go b/core/http/endpoints/openai/files.go similarity index 83% rename from api/openai/files.go rename to core/http/endpoints/openai/files.go index 140b4151..5cb8d7a9 100644 --- a/api/openai/files.go +++ b/core/http/endpoints/openai/files.go @@ -8,8 +8,8 @@ import ( "path/filepath" "time" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -62,7 +62,7 @@ func LoadUploadConfig(uploadPath string) { } // UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create -func UploadFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { file, err := c.FormFile("file") if err != nil { @@ -70,8 +70,8 @@ func UploadFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fib } // Check the file size - if file.Size > int64(o.UploadLimitMB*1024*1024) { - return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("File size %d exceeds upload limit %d", file.Size, o.UploadLimitMB)) + if file.Size > int64(appConfig.UploadLimitMB*1024*1024) { + return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("File size %d exceeds upload limit %d", file.Size, appConfig.UploadLimitMB)) } purpose := c.FormValue("purpose", "") //TODO put in purpose dirs @@ -82,7 +82,7 @@ func UploadFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fib // Sanitize the filename to prevent directory traversal filename := utils.SanitizeFileName(file.Filename) - savePath := filepath.Join(o.UploadDir, filename) + savePath := filepath.Join(appConfig.UploadDir, filename) // Check if file already exists if _, err := os.Stat(savePath); !os.IsNotExist(err) { @@ -104,13 +104,13 @@ func UploadFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fib } uploadedFiles = append(uploadedFiles, f) - saveUploadConfig(o.UploadDir) + saveUploadConfig(appConfig.UploadDir) return c.Status(fiber.StatusOK).JSON(f) } } // ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list -func ListFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { type ListFiles struct { Data []File Object string @@ -150,7 +150,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) { } // GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve -func GetFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) if err != nil { @@ -162,7 +162,7 @@ func GetFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber. } // DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete -func DeleteFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { type DeleteStatus struct { Id string Object string @@ -175,7 +175,7 @@ func DeleteFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fib return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) } - err = os.Remove(filepath.Join(o.UploadDir, file.Filename)) + err = os.Remove(filepath.Join(appConfig.UploadDir, file.Filename)) if err != nil { // If the file doesn't exist then we should just continue to remove it if !errors.Is(err, os.ErrNotExist) { @@ -191,7 +191,7 @@ func DeleteFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fib } } - saveUploadConfig(o.UploadDir) + saveUploadConfig(appConfig.UploadDir) return c.JSON(DeleteStatus{ Id: file.ID, Object: "file", @@ -201,14 +201,14 @@ func DeleteFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fib } // GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents -func GetFilesContentsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) if err != nil { return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) } - fileContents, err := os.ReadFile(filepath.Join(o.UploadDir, file.Filename)) + fileContents, err := os.ReadFile(filepath.Join(appConfig.UploadDir, file.Filename)) if err != nil { return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) } diff --git a/api/openai/files_test.go b/core/http/endpoints/openai/files_test.go similarity index 92% rename from api/openai/files_test.go rename to core/http/endpoints/openai/files_test.go index 535cde8b..a036bd0d 100644 --- a/api/openai/files_test.go +++ b/core/http/endpoints/openai/files_test.go @@ -11,8 +11,8 @@ import ( "path/filepath" "strings" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + utils2 "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" "github.com/stretchr/testify/assert" @@ -25,11 +25,11 @@ type ListFiles struct { Object string } -func startUpApp() (app *fiber.App, option *options.Option, loader *config.ConfigLoader) { +func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) { // Preparing the mocked objects - loader = &config.ConfigLoader{} + loader = &config.BackendConfigLoader{} - option = &options.Option{ + option = &config.ApplicationConfig{ UploadLimitMB: 10, UploadDir: "test_dir", } @@ -52,9 +52,9 @@ func startUpApp() (app *fiber.App, option *options.Option, loader *config.Config func TestUploadFileExceedSizeLimit(t *testing.T) { // Preparing the mocked objects - loader := &config.ConfigLoader{} + loader := &config.BackendConfigLoader{} - option := &options.Option{ + option := &config.ApplicationConfig{ UploadLimitMB: 10, UploadDir: "test_dir", } @@ -174,9 +174,9 @@ func CallFilesContentEndpoint(t *testing.T, app *fiber.App, fileId string) (*htt return app.Test(request) } -func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, o *options.Option) (*http.Response, error) { +func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) (*http.Response, error) { // Create a file that exceeds the limit - file := createTestFile(t, fileName, fileSize, o) + file := createTestFile(t, fileName, fileSize, appConfig) // Creating a new HTTP Request body, writer := newMultipartFile(file.Name(), tag, purpose) @@ -186,9 +186,9 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos return app.Test(req) } -func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, o *options.Option) File { +func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File { // Create a file that exceeds the limit - file := createTestFile(t, fileName, fileSize, o) + file := createTestFile(t, fileName, fileSize, appConfig) // Creating a new HTTP Request body, writer := newMultipartFile(file.Name(), tag, purpose) @@ -233,7 +233,7 @@ func newMultipartFile(filePath, tag, purpose string) (*strings.Reader, *multipar } // Helper to create test files -func createTestFile(t *testing.T, name string, sizeMB int, option *options.Option) *os.File { +func createTestFile(t *testing.T, name string, sizeMB int, option *config.ApplicationConfig) *os.File { err := os.MkdirAll(option.UploadDir, 0755) if err != nil { diff --git a/api/openai/image.go b/core/http/endpoints/openai/image.go similarity index 87% rename from api/openai/image.go rename to core/http/endpoints/openai/image.go index 2da6883e..8f535801 100644 --- a/api/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -13,12 +13,12 @@ import ( "strings" "time" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/google/uuid" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -59,9 +59,9 @@ func downloadFile(url string) (string, error) { * */ -func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, o, false) + m, input, err := readRequest(c, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -71,7 +71,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx } log.Debug().Msgf("Loading model: %+v", m) - config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false) + config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -104,7 +104,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx } // Create a temporary file - outputFile, err := os.CreateTemp(o.ImageDir, "b64") + outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64") if err != nil { return err } @@ -133,15 +133,15 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx sizeParts := strings.Split(input.Size, "x") if len(sizeParts) != 2 { - return fmt.Errorf("Invalid value for 'size'") + return fmt.Errorf("invalid value for 'size'") } width, err := strconv.Atoi(sizeParts[0]) if err != nil { - return fmt.Errorf("Invalid value for 'size'") + return fmt.Errorf("invalid value for 'size'") } height, err := strconv.Atoi(sizeParts[1]) if err != nil { - return fmt.Errorf("Invalid value for 'size'") + return fmt.Errorf("invalid value for 'size'") } b64JSON := false @@ -179,7 +179,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx tempDir := "" if !b64JSON { - tempDir = o.ImageDir + tempDir = appConfig.ImageDir } // Create a temporary file outputFile, err := os.CreateTemp(tempDir, "b64") @@ -196,7 +196,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx baseURL := c.BaseURL() - fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, src, output, o.Loader, *config, o) + fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig) if err != nil { return err } diff --git a/api/openai/inference.go b/core/http/endpoints/openai/inference.go similarity index 90% rename from api/openai/inference.go rename to core/http/endpoints/openai/inference.go index 184688b2..5d97d21d 100644 --- a/api/openai/inference.go +++ b/core/http/endpoints/openai/inference.go @@ -2,8 +2,8 @@ package openai import ( "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" model "github.com/go-skynet/LocalAI/pkg/model" ) @@ -11,8 +11,8 @@ import ( func ComputeChoices( req *schema.OpenAIRequest, predInput string, - config *config.Config, - o *options.Option, + config *config.BackendConfig, + o *config.ApplicationConfig, loader *model.ModelLoader, cb func(string, *[]schema.Choice), tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) { diff --git a/api/openai/list.go b/core/http/endpoints/openai/list.go similarity index 87% rename from api/openai/list.go rename to core/http/endpoints/openai/list.go index 614d5c80..04e611a2 100644 --- a/api/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -3,15 +3,15 @@ package openai import ( "regexp" - config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) -func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { - models, err := loader.ListModels() + models, err := ml.ListModels() if err != nil { return err } @@ -40,7 +40,7 @@ func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func excludeConfigured := c.QueryBool("excludeConfigured", true) // Start with the known configurations - for _, c := range cm.GetAllConfigs() { + for _, c := range cl.GetAllBackendConfigs() { if excludeConfigured { mm[c.Model] = nil } diff --git a/api/openai/request.go b/core/http/endpoints/openai/request.go similarity index 89% rename from api/openai/request.go rename to core/http/endpoints/openai/request.go index 83c41d97..46ff2438 100644 --- a/api/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -5,13 +5,12 @@ import ( "encoding/base64" "encoding/json" "fmt" - "io/ioutil" + "io" "net/http" "strings" - fiberContext "github.com/go-skynet/LocalAI/api/ctx" - config "github.com/go-skynet/LocalAI/core/config" - options "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" @@ -19,11 +18,9 @@ import ( "github.com/rs/zerolog/log" ) -func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *schema.OpenAIRequest, error) { +func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { input := new(schema.OpenAIRequest) - ctx, cancel := context.WithCancel(o.Context) - input.Context = ctx - input.Cancel = cancel + // Get input data from the request body if err := c.BodyParser(input); err != nil { return "", nil, fmt.Errorf("failed parsing request body: %w", err) @@ -31,9 +28,13 @@ func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *sch received, _ := json.Marshal(input) + ctx, cancel := context.WithCancel(o.Context) + input.Context = ctx + input.Cancel = cancel + log.Debug().Msgf("Request received: %s", string(received)) - modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, firstModel) + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel) return modelFile, input, err } @@ -50,7 +51,7 @@ func getBase64Image(s string) (string, error) { defer resp.Body.Close() // read the image data into memory - data, err := ioutil.ReadAll(resp.Body) + data, err := io.ReadAll(resp.Body) if err != nil { return "", err } @@ -69,7 +70,7 @@ func getBase64Image(s string) (string, error) { return "", fmt.Errorf("not valid string") } -func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) { +func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) { if input.Echo { config.Echo = input.Echo } @@ -270,8 +271,8 @@ func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) { } } -func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) { - cfg, err := config.Load(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16) +func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { + cfg, err := config.LoadBackendConfigFileByName(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16) // Set the parameters for the language model prediction updateRequestConfig(cfg, input) diff --git a/api/openai/transcription.go b/core/http/endpoints/openai/transcription.go similarity index 71% rename from api/openai/transcription.go rename to core/http/endpoints/openai/transcription.go index c3fd7d5c..403f8b02 100644 --- a/api/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -9,22 +9,22 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) // https://platform.openai.com/docs/api-reference/audio/create -func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { +func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, o, false) + m, input, err := readRequest(c, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -59,7 +59,7 @@ func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe log.Debug().Msgf("Audio file copied to: %+v", dst) - tr, err := backend.ModelTranscription(dst, input.Language, o.Loader, *config, o) + tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig) if err != nil { return err } diff --git a/core/schema/localai.go b/core/schema/localai.go new file mode 100644 index 00000000..115183a3 --- /dev/null +++ b/core/schema/localai.go @@ -0,0 +1,21 @@ +package schema + +import ( + gopsutil "github.com/shirou/gopsutil/v3/process" +) + +type BackendMonitorRequest struct { + Model string `json:"model" yaml:"model"` +} + +type BackendMonitorResponse struct { + MemoryInfo *gopsutil.MemoryInfoStat + MemoryPercent float32 + CPUPercent float64 +} + +type TTSRequest struct { + Model string `json:"model" yaml:"model"` + Input string `json:"input" yaml:"input"` + Backend string `json:"backend" yaml:"backend"` +} diff --git a/core/schema/openai.go b/core/schema/openai.go index 53dd5324..1c13847c 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -3,8 +3,6 @@ package schema import ( "context" - config "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/pkg/grammar" ) @@ -108,10 +106,10 @@ type ChatCompletionResponseFormat struct { } type OpenAIRequest struct { - config.PredictionOptions + PredictionOptions - Context context.Context - Cancel context.CancelFunc + Context context.Context `json:"-"` + Cancel context.CancelFunc `json:"-"` // whisper File string `json:"file" validate:"required"` diff --git a/core/config/prediction.go b/core/schema/prediction.go similarity index 99% rename from core/config/prediction.go rename to core/schema/prediction.go index dccb4dfb..efd085a4 100644 --- a/core/config/prediction.go +++ b/core/schema/prediction.go @@ -1,4 +1,4 @@ -package config +package schema type PredictionOptions struct { diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go new file mode 100644 index 00000000..88176753 --- /dev/null +++ b/core/services/backend_monitor.go @@ -0,0 +1,140 @@ +package services + +import ( + "context" + "fmt" + "strings" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" + "github.com/go-skynet/LocalAI/pkg/model" + + "github.com/rs/zerolog/log" + + gopsutil "github.com/shirou/gopsutil/v3/process" +) + +type BackendMonitor struct { + configLoader *config.BackendConfigLoader + modelLoader *model.ModelLoader + options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. +} + +func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor { + return BackendMonitor{ + configLoader: configLoader, + modelLoader: modelLoader, + options: appConfig, + } +} + +func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) { + config, exists := bm.configLoader.GetBackendConfig(modelName) + var backendId string + if exists { + backendId = config.Model + } else { + // Last ditch effort: use it raw, see if a backend happens to match. + backendId = modelName + } + + if !strings.HasSuffix(backendId, ".bin") { + backendId = fmt.Sprintf("%s.bin", backendId) + } + + return backendId, nil +} + +func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { + config, exists := bm.configLoader.GetBackendConfig(model) + var backend string + if exists { + backend = config.Model + } else { + // Last ditch effort: use it raw, see if a backend happens to match. + backend = model + } + + if !strings.HasSuffix(backend, ".bin") { + backend = fmt.Sprintf("%s.bin", backend) + } + + pid, err := bm.modelLoader.GetGRPCPID(backend) + + if err != nil { + log.Error().Msgf("model %s : failed to find pid %+v", model, err) + return nil, err + } + + // Name is slightly frightening but this does _not_ create a new process, rather it looks up an existing process by PID. + backendProcess, err := gopsutil.NewProcess(int32(pid)) + + if err != nil { + log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err) + return nil, err + } + + memInfo, err := backendProcess.MemoryInfo() + + if err != nil { + log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err) + return nil, err + } + + memPercent, err := backendProcess.MemoryPercent() + if err != nil { + log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err) + return nil, err + } + + cpuPercent, err := backendProcess.CPUPercent() + if err != nil { + log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err) + return nil, err + } + + return &schema.BackendMonitorResponse{ + MemoryInfo: memInfo, + MemoryPercent: memPercent, + CPUPercent: cpuPercent, + }, nil +} + +func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) { + backendId, err := bm.getModelLoaderIDFromModelName(modelName) + if err != nil { + return nil, err + } + modelAddr := bm.modelLoader.CheckIsLoaded(backendId) + if modelAddr == "" { + return nil, fmt.Errorf("backend %s is not currently loaded", backendId) + } + + status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO()) + if rpcErr != nil { + log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) + val, slbErr := bm.SampleLocalBackendProcess(backendId) + if slbErr != nil { + return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) + } + return &proto.StatusResponse{ + State: proto.StatusResponse_ERROR, + Memory: &proto.MemoryUsageData{ + Total: val.MemoryInfo.VMS, + Breakdown: map[string]uint64{ + "gopsutil-RSS": val.MemoryInfo.RSS, + }, + }, + }, nil + } + return status, nil +} + +func (bm BackendMonitor) ShutdownModel(modelName string) error { + backendId, err := bm.getModelLoaderIDFromModelName(modelName) + if err != nil { + return err + } + return bm.modelLoader.ShutdownModel(backendId) +} diff --git a/core/services/gallery.go b/core/services/gallery.go new file mode 100644 index 00000000..826f4573 --- /dev/null +++ b/core/services/gallery.go @@ -0,0 +1,167 @@ +package services + +import ( + "context" + "encoding/json" + "os" + "strings" + "sync" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/utils" + "gopkg.in/yaml.v2" +) + +type GalleryService struct { + modelPath string + sync.Mutex + C chan gallery.GalleryOp + statuses map[string]*gallery.GalleryOpStatus +} + +func NewGalleryService(modelPath string) *GalleryService { + return &GalleryService{ + modelPath: modelPath, + C: make(chan gallery.GalleryOp), + statuses: make(map[string]*gallery.GalleryOpStatus), + } +} + +func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error { + + config, err := gallery.GetGalleryConfigFromURL(req.URL) + if err != nil { + return err + } + + config.Files = append(config.Files, req.AdditionalFiles...) + + return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) +} + +func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) { + g.Lock() + defer g.Unlock() + g.statuses[s] = op +} + +func (g *GalleryService) GetStatus(s string) *gallery.GalleryOpStatus { + g.Lock() + defer g.Unlock() + + return g.statuses[s] +} + +func (g *GalleryService) GetAllStatus() map[string]*gallery.GalleryOpStatus { + g.Lock() + defer g.Unlock() + + return g.statuses +} + +func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader) { + go func() { + for { + select { + case <-c.Done(): + return + case op := <-g.C: + utils.ResetDownloadTimers() + + g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Message: "processing", Progress: 0}) + + // updates the status with an error + updateError := func(e error) { + g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()}) + } + + // displayDownload displays the download progress + progressCallback := func(fileName string, current string, total string, percentage float64) { + g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current}) + utils.DisplayDownloadFunction(fileName, current, total, percentage) + } + + var err error + // if the request contains a gallery name, we apply the gallery from the gallery list + if op.GalleryName != "" { + if strings.Contains(op.GalleryName, "@") { + err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + } else { + err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + } + } else { + err = prepareModel(g.modelPath, op.Req, cl, progressCallback) + } + + if err != nil { + updateError(err) + continue + } + + // Reload models + err = cl.LoadBackendConfigsFromPath(g.modelPath) + if err != nil { + updateError(err) + continue + } + + err = cl.Preload(g.modelPath) + if err != nil { + updateError(err) + continue + } + + g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Processed: true, Message: "completed", Progress: 100}) + } + } + }() +} + +type galleryModel struct { + gallery.GalleryModel `yaml:",inline"` // https://github.com/go-yaml/yaml/issues/63 + ID string `json:"id"` +} + +func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error { + var err error + for _, r := range requests { + utils.ResetDownloadTimers() + if r.ID == "" { + err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) + } else { + if strings.Contains(r.ID, "@") { + err = gallery.InstallModelFromGallery( + galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction) + } else { + err = gallery.InstallModelFromGalleryByName( + galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction) + } + } + } + return err +} + +func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error { + dat, err := os.ReadFile(s) + if err != nil { + return err + } + var requests []galleryModel + + if err := yaml.Unmarshal(dat, &requests); err != nil { + return err + } + + return processRequests(modelPath, s, cl, galleries, requests) +} + +func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error { + var requests []galleryModel + err := json.Unmarshal([]byte(s), &requests) + if err != nil { + return err + } + + return processRequests(modelPath, s, cl, galleries, requests) +} diff --git a/core/services/metrics.go b/core/services/metrics.go new file mode 100644 index 00000000..b3107398 --- /dev/null +++ b/core/services/metrics.go @@ -0,0 +1,54 @@ +package services + +import ( + "context" + + "github.com/rs/zerolog/log" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/metric" + metricApi "go.opentelemetry.io/otel/sdk/metric" +) + +type LocalAIMetricsService struct { + Meter metric.Meter + ApiTimeMetric metric.Float64Histogram +} + +func (m *LocalAIMetricsService) ObserveAPICall(method string, path string, duration float64) { + opts := metric.WithAttributes( + attribute.String("method", method), + attribute.String("path", path), + ) + m.ApiTimeMetric.Record(context.Background(), duration, opts) +} + +// setupOTelSDK bootstraps the OpenTelemetry pipeline. +// If it does not return an error, make sure to call shutdown for proper cleanup. +func NewLocalAIMetricsService() (*LocalAIMetricsService, error) { + exporter, err := prometheus.New() + if err != nil { + return nil, err + } + provider := metricApi.NewMeterProvider(metricApi.WithReader(exporter)) + meter := provider.Meter("github.com/go-skynet/LocalAI") + + apiTimeMetric, err := meter.Float64Histogram("api_call", metric.WithDescription("api calls")) + if err != nil { + return nil, err + } + + return &LocalAIMetricsService{ + Meter: meter, + ApiTimeMetric: apiTimeMetric, + }, nil +} + +func (lams LocalAIMetricsService) Shutdown() error { + // TODO: Not sure how to actually do this: + //// setupOTelSDK bootstraps the OpenTelemetry pipeline. + //// If it does not return an error, make sure to call shutdown for proper cleanup. + + log.Warn().Msgf("LocalAIMetricsService Shutdown called, but OTelSDK proper shutdown not yet implemented?") + return nil +} diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go new file mode 100644 index 00000000..0c7eff2d --- /dev/null +++ b/core/startup/config_file_watcher.go @@ -0,0 +1,100 @@ +package startup + +import ( + "encoding/json" + "fmt" + "os" + "path" + + "github.com/fsnotify/fsnotify" + "github.com/go-skynet/LocalAI/core/config" + "github.com/imdario/mergo" + "github.com/rs/zerolog/log" +) + +type WatchConfigDirectoryCloser func() error + +func ReadApiKeysJson(configDir string, appConfig *config.ApplicationConfig) error { + fileContent, err := os.ReadFile(path.Join(configDir, "api_keys.json")) + if err == nil { + // Parse JSON content from the file + var fileKeys []string + err := json.Unmarshal(fileContent, &fileKeys) + if err == nil { + appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) + return nil + } + return err + } + return err +} + +func ReadExternalBackendsJson(configDir string, appConfig *config.ApplicationConfig) error { + fileContent, err := os.ReadFile(path.Join(configDir, "external_backends.json")) + if err != nil { + return err + } + // Parse JSON content from the file + var fileBackends map[string]string + err = json.Unmarshal(fileContent, &fileBackends) + if err != nil { + return err + } + err = mergo.Merge(&appConfig.ExternalGRPCBackends, fileBackends) + if err != nil { + return err + } + return nil +} + +var CONFIG_FILE_UPDATES = map[string]func(configDir string, appConfig *config.ApplicationConfig) error{ + "api_keys.json": ReadApiKeysJson, + "external_backends.json": ReadExternalBackendsJson, +} + +func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) (WatchConfigDirectoryCloser, error) { + if len(configDir) == 0 { + return nil, fmt.Errorf("configDir blank") + } + configWatcher, err := fsnotify.NewWatcher() + if err != nil { + log.Fatal().Msgf("Unable to create a watcher for the LocalAI Configuration Directory: %+v", err) + } + ret := func() error { + configWatcher.Close() + return nil + } + + // Start listening for events. + go func() { + for { + select { + case event, ok := <-configWatcher.Events: + if !ok { + return + } + if event.Has(fsnotify.Write) { + for targetName, watchFn := range CONFIG_FILE_UPDATES { + if event.Name == targetName { + err := watchFn(configDir, appConfig) + log.Warn().Msgf("WatchConfigDirectory goroutine for %s: failed to update options: %+v", targetName, err) + } + } + } + case _, ok := <-configWatcher.Errors: + if !ok { + return + } + log.Error().Msgf("WatchConfigDirectory goroutine error: %+v", err) + } + } + }() + + // Add a path. + err = configWatcher.Add(configDir) + if err != nil { + return ret, fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err) + } + + return ret, nil +} diff --git a/core/startup/startup.go b/core/startup/startup.go new file mode 100644 index 00000000..43e6646d --- /dev/null +++ b/core/startup/startup.go @@ -0,0 +1,128 @@ +package startup + +import ( + "fmt" + "os" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/internal" + "github.com/go-skynet/LocalAI/pkg/assets" + "github.com/go-skynet/LocalAI/pkg/model" + pkgStartup "github.com/go-skynet/LocalAI/pkg/startup" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { + options := config.NewApplicationConfig(opts...) + + zerolog.SetGlobalLevel(zerolog.InfoLevel) + if options.Debug { + zerolog.SetGlobalLevel(zerolog.DebugLevel) + } + + log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) + log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) + + // Make sure directories exists + if options.ModelPath == "" { + return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") + } + err := os.MkdirAll(options.ModelPath, 0755) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) + } + if options.ImageDir != "" { + err := os.MkdirAll(options.ImageDir, 0755) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) + } + } + if options.AudioDir != "" { + err := os.MkdirAll(options.AudioDir, 0755) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) + } + } + if options.UploadDir != "" { + err := os.MkdirAll(options.UploadDir, 0755) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) + } + } + + // + pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) + + cl := config.NewBackendConfigLoader() + ml := model.NewModelLoader(options.ModelPath) + + if err := cl.LoadBackendConfigsFromPath(options.ModelPath); err != nil { + log.Error().Msgf("error loading config files: %s", err.Error()) + } + + if options.ConfigFile != "" { + if err := cl.LoadBackendConfigFile(options.ConfigFile); err != nil { + log.Error().Msgf("error loading config file: %s", err.Error()) + } + } + + if err := cl.Preload(options.ModelPath); err != nil { + log.Error().Msgf("error downloading models: %s", err.Error()) + } + + if options.PreloadJSONModels != "" { + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil { + return nil, nil, nil, err + } + } + + if options.PreloadModelsFromPath != "" { + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil { + return nil, nil, nil, err + } + } + + if options.Debug { + for _, v := range cl.ListBackendConfigs() { + cfg, _ := cl.GetBackendConfig(v) + log.Debug().Msgf("Model: %s (config: %+v)", v, cfg) + } + } + + if options.AssetsDestination != "" { + // Extract files from the embedded FS + err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) + log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) + if err != nil { + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + } + } + + // turn off any process that was started by GRPC if the context is canceled + go func() { + <-options.Context.Done() + log.Debug().Msgf("Context canceled, shutting down") + ml.StopAllGRPC() + }() + + if options.WatchDog { + wd := model.NewWatchDog( + ml, + options.WatchDogBusyTimeout, + options.WatchDogIdleTimeout, + options.WatchDogBusy, + options.WatchDogIdle) + ml.SetWatchDog(wd) + go wd.Run() + go func() { + <-options.Context.Done() + log.Debug().Msgf("Context canceled, shutting down") + wd.Shutdown() + }() + } + + log.Info().Msg("core/startup process completed!") + return cl, ml, options, nil +} diff --git a/examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru b/examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru index e3f72134..51e3771a 100644 --- a/examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru +++ b/examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru @@ -6,6 +6,12 @@ meta { get { url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor - body: none + body: json auth: none } + +body:json { + { + "model": "{{DEFAULT_MODEL}}" + } +} diff --git a/examples/langchain/langchainjs-localai-example/src/index.mts b/examples/langchain/langchainjs-localai-example/src/index.mts index e6dcfb86..11faa384 100644 --- a/examples/langchain/langchainjs-localai-example/src/index.mts +++ b/examples/langchain/langchainjs-localai-example/src/index.mts @@ -4,7 +4,7 @@ import { Document } from "langchain/document"; import { initializeAgentExecutorWithOptions } from "langchain/agents"; import {Calculator} from "langchain/tools/calculator"; -const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1'; +const pathToLocalAI = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1'; const fakeApiKey = process.env['OPENAI_API_KEY'] || '-'; const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo'; @@ -21,7 +21,7 @@ function getModel(): OpenAIChat { openAIApiKey: fakeApiKey, maxRetries: 2 }, { - basePath: pathToLocalAi, + basePath: pathToLocalAI, apiKey: fakeApiKey, }); } diff --git a/go.mod b/go.mod index bbd787b5..bbb90838 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.21 require ( github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df + github.com/fsnotify/fsnotify v1.7.0 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 @@ -14,7 +15,6 @@ require ( github.com/hashicorp/go-multierror v1.1.1 github.com/hpcloud/tail v1.0.0 github.com/imdario/mergo v0.3.16 - github.com/json-iterator/go v1.1.12 github.com/mholt/archiver/v3 v3.5.1 github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af @@ -64,8 +64,6 @@ require ( github.com/klauspost/pgzip v1.2.5 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect github.com/pkoukk/tiktoken-go v0.1.2 // indirect @@ -104,7 +102,7 @@ require ( github.com/valyala/tcplisten v1.0.0 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect golang.org/x/net v0.17.0 // indirect - golang.org/x/sys v0.13.0 // indirect + golang.org/x/sys v0.17.0 // indirect golang.org/x/text v0.13.0 // indirect golang.org/x/tools v0.12.0 // indirect ) diff --git a/go.sum b/go.sum index 20dfbfb4..84aba3a0 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,8 @@ github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdf github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= @@ -72,7 +74,6 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= @@ -86,8 +87,6 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= -github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= @@ -117,11 +116,6 @@ github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Cl github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= -github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= @@ -278,6 +272,8 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= diff --git a/main.go b/main.go index 7e4262ee..237191cf 100644 --- a/main.go +++ b/main.go @@ -13,11 +13,12 @@ import ( "time" "github.com/go-skynet/LocalAI/core/backend" - config "github.com/go-skynet/LocalAI/core/config" - api "github.com/go-skynet/LocalAI/core/http" - "github.com/go-skynet/LocalAI/core/options" + "github.com/go-skynet/LocalAI/core/config" + + "github.com/go-skynet/LocalAI/core/http" + "github.com/go-skynet/LocalAI/core/startup" + "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/metrics" "github.com/go-skynet/LocalAI/pkg/gallery" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/rs/zerolog" @@ -206,6 +207,12 @@ func main() { EnvVars: []string{"PRELOAD_BACKEND_ONLY"}, Value: false, }, + &cli.StringFlag{ + Name: "localai-config-dir", + Usage: "Directory to use for the configuration files of LocalAI itself. This is NOT where model files should be placed.", + EnvVars: []string{"LOCALAI_CONFIG_DIR"}, + Value: "./configuration", + }, }, Description: ` LocalAI is a drop-in replacement OpenAI API which runs inference locally. @@ -224,56 +231,56 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit UsageText: `local-ai [options]`, Copyright: "Ettore Di Giacinto", Action: func(ctx *cli.Context) error { - opts := []options.AppOption{ - options.WithConfigFile(ctx.String("config-file")), - options.WithJSONStringPreload(ctx.String("preload-models")), - options.WithYAMLConfigPreload(ctx.String("preload-models-config")), - options.WithModelLoader(model.NewModelLoader(ctx.String("models-path"))), - options.WithContextSize(ctx.Int("context-size")), - options.WithDebug(ctx.Bool("debug")), - options.WithImageDir(ctx.String("image-path")), - options.WithAudioDir(ctx.String("audio-path")), - options.WithUploadDir(ctx.String("upload-path")), - options.WithF16(ctx.Bool("f16")), - options.WithStringGalleries(ctx.String("galleries")), - options.WithModelLibraryURL(ctx.String("remote-library")), - options.WithDisableMessage(false), - options.WithCors(ctx.Bool("cors")), - options.WithCorsAllowOrigins(ctx.String("cors-allow-origins")), - options.WithThreads(ctx.Int("threads")), - options.WithBackendAssets(backendAssets), - options.WithBackendAssetsOutput(ctx.String("backend-assets-path")), - options.WithUploadLimitMB(ctx.Int("upload-limit")), - options.WithApiKeys(ctx.StringSlice("api-keys")), - options.WithModelsURL(append(ctx.StringSlice("models"), ctx.Args().Slice()...)...), + opts := []config.AppOption{ + config.WithConfigFile(ctx.String("config-file")), + config.WithJSONStringPreload(ctx.String("preload-models")), + config.WithYAMLConfigPreload(ctx.String("preload-models-config")), + config.WithModelPath(ctx.String("models-path")), + config.WithContextSize(ctx.Int("context-size")), + config.WithDebug(ctx.Bool("debug")), + config.WithImageDir(ctx.String("image-path")), + config.WithAudioDir(ctx.String("audio-path")), + config.WithUploadDir(ctx.String("upload-path")), + config.WithF16(ctx.Bool("f16")), + config.WithStringGalleries(ctx.String("galleries")), + config.WithModelLibraryURL(ctx.String("remote-library")), + config.WithDisableMessage(false), + config.WithCors(ctx.Bool("cors")), + config.WithCorsAllowOrigins(ctx.String("cors-allow-origins")), + config.WithThreads(ctx.Int("threads")), + config.WithBackendAssets(backendAssets), + config.WithBackendAssetsOutput(ctx.String("backend-assets-path")), + config.WithUploadLimitMB(ctx.Int("upload-limit")), + config.WithApiKeys(ctx.StringSlice("api-keys")), + config.WithModelsURL(append(ctx.StringSlice("models"), ctx.Args().Slice()...)...), } idleWatchDog := ctx.Bool("enable-watchdog-idle") busyWatchDog := ctx.Bool("enable-watchdog-busy") if idleWatchDog || busyWatchDog { - opts = append(opts, options.EnableWatchDog) + opts = append(opts, config.EnableWatchDog) if idleWatchDog { - opts = append(opts, options.EnableWatchDogIdleCheck) + opts = append(opts, config.EnableWatchDogIdleCheck) dur, err := time.ParseDuration(ctx.String("watchdog-idle-timeout")) if err != nil { return err } - opts = append(opts, options.SetWatchDogIdleTimeout(dur)) + opts = append(opts, config.SetWatchDogIdleTimeout(dur)) } if busyWatchDog { - opts = append(opts, options.EnableWatchDogBusyCheck) + opts = append(opts, config.EnableWatchDogBusyCheck) dur, err := time.ParseDuration(ctx.String("watchdog-busy-timeout")) if err != nil { return err } - opts = append(opts, options.SetWatchDogBusyTimeout(dur)) + opts = append(opts, config.SetWatchDogBusyTimeout(dur)) } } if ctx.Bool("parallel-requests") { - opts = append(opts, options.EnableParallelBackendRequests) + opts = append(opts, config.EnableParallelBackendRequests) } if ctx.Bool("single-active-backend") { - opts = append(opts, options.EnableSingleBackend) + opts = append(opts, config.EnableSingleBackend) } externalgRPC := ctx.StringSlice("external-grpc-backends") @@ -281,30 +288,38 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit for _, v := range externalgRPC { backend := v[:strings.IndexByte(v, ':')] uri := v[strings.IndexByte(v, ':')+1:] - opts = append(opts, options.WithExternalBackend(backend, uri)) + opts = append(opts, config.WithExternalBackend(backend, uri)) } if ctx.Bool("autoload-galleries") { - opts = append(opts, options.EnableGalleriesAutoload) + opts = append(opts, config.EnableGalleriesAutoload) } if ctx.Bool("preload-backend-only") { - _, _, err := api.Startup(opts...) + _, _, _, err := startup.Startup(opts...) return err } - metrics, err := metrics.SetupMetrics() + cl, ml, options, err := startup.Startup(opts...) + if err != nil { - return err + return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } - opts = append(opts, options.WithMetrics(metrics)) - app, err := api.App(opts...) + closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options) + defer closeConfigWatcherFn() + if err != nil { + return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir")) + } + + appHTTP, err := http.App(cl, ml, options) + if err != nil { + log.Error().Msg("Error during HTTP App constructor") return err } - return app.Listen(ctx.String("address")) + return appHTTP.Listen(ctx.String("address")) }, Commands: []*cli.Command{ { @@ -402,16 +417,17 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit text := strings.Join(ctx.Args().Slice(), " ") - opts := &options.Option{ - Loader: model.NewModelLoader(ctx.String("models-path")), + opts := &config.ApplicationConfig{ + ModelPath: ctx.String("models-path"), Context: context.Background(), AudioDir: outputDir, AssetsDestination: ctx.String("backend-assets-path"), } + ml := model.NewModelLoader(opts.ModelPath) - defer opts.Loader.StopAllGRPC() + defer ml.StopAllGRPC() - filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts, config.Config{}) + filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, ml, opts, config.BackendConfig{}) if err != nil { return err } @@ -464,27 +480,28 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit language := ctx.String("language") threads := ctx.Int("threads") - opts := &options.Option{ - Loader: model.NewModelLoader(ctx.String("models-path")), + opts := &config.ApplicationConfig{ + ModelPath: ctx.String("models-path"), Context: context.Background(), AssetsDestination: ctx.String("backend-assets-path"), } - cl := config.NewConfigLoader() - if err := cl.LoadConfigs(ctx.String("models-path")); err != nil { + cl := config.NewBackendConfigLoader() + ml := model.NewModelLoader(opts.ModelPath) + if err := cl.LoadBackendConfigsFromPath(ctx.String("models-path")); err != nil { return err } - c, exists := cl.GetConfig(modelOption) + c, exists := cl.GetBackendConfig(modelOption) if !exists { return errors.New("model not found") } c.Threads = threads - defer opts.Loader.StopAllGRPC() + defer ml.StopAllGRPC() - tr, err := backend.ModelTranscription(filename, language, opts.Loader, c, opts) + tr, err := backend.ModelTranscription(filename, language, ml, c, opts) if err != nil { return err } diff --git a/metrics/metrics.go b/metrics/metrics.go deleted file mode 100644 index 84b83161..00000000 --- a/metrics/metrics.go +++ /dev/null @@ -1,83 +0,0 @@ -package metrics - -import ( - "context" - "time" - - "github.com/gofiber/fiber/v2" - "github.com/gofiber/fiber/v2/middleware/adaptor" - "github.com/prometheus/client_golang/prometheus/promhttp" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/exporters/prometheus" - api "go.opentelemetry.io/otel/metric" - "go.opentelemetry.io/otel/sdk/metric" -) - -type Metrics struct { - meter api.Meter - apiTimeMetric api.Float64Histogram -} - -// setupOTelSDK bootstraps the OpenTelemetry pipeline. -// If it does not return an error, make sure to call shutdown for proper cleanup. -func SetupMetrics() (*Metrics, error) { - exporter, err := prometheus.New() - if err != nil { - return nil, err - } - provider := metric.NewMeterProvider(metric.WithReader(exporter)) - meter := provider.Meter("github.com/go-skynet/LocalAI") - - apiTimeMetric, err := meter.Float64Histogram("api_call", api.WithDescription("api calls")) - if err != nil { - return nil, err - } - - return &Metrics{ - meter: meter, - apiTimeMetric: apiTimeMetric, - }, nil -} - -func MetricsHandler() fiber.Handler { - return adaptor.HTTPHandler(promhttp.Handler()) -} - -type apiMiddlewareConfig struct { - Filter func(c *fiber.Ctx) bool - metrics *Metrics -} - -func APIMiddleware(metrics *Metrics) fiber.Handler { - cfg := apiMiddlewareConfig{ - metrics: metrics, - Filter: func(c *fiber.Ctx) bool { - if c.Path() == "/metrics" { - return true - } - return false - }, - } - - return func(c *fiber.Ctx) error { - if cfg.Filter != nil && cfg.Filter(c) { - return c.Next() - } - path := c.Path() - method := c.Method() - - start := time.Now() - err := c.Next() - elapsed := float64(time.Since(start)) / float64(time.Second) - cfg.metrics.ObserveAPICall(method, path, elapsed) - return err - } -} - -func (m *Metrics) ObserveAPICall(method string, path string, duration float64) { - opts := api.WithAttributes( - attribute.String("method", method), - attribute.String("path", path), - ) - m.apiTimeMetric.Record(context.Background(), duration, opts) -} diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index 80214f5b..b678ae0d 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -179,6 +179,10 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string, } defer resp.Body.Close() + if resp.StatusCode >= 400 { + return fmt.Errorf("failed to download url %q, invalid status code %d", url, resp.StatusCode) + } + // Create parent directory err = os.MkdirAll(filepath.Dir(filePath), 0755) if err != nil { diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go index f454c611..21d3a03d 100644 --- a/pkg/gallery/models_test.go +++ b/pkg/gallery/models_test.go @@ -18,7 +18,6 @@ var _ = Describe("Model test", func() { defer os.RemoveAll(tempdir) c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) - err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}) Expect(err).ToNot(HaveOccurred()) diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go new file mode 100644 index 00000000..873c356d --- /dev/null +++ b/pkg/gallery/op.go @@ -0,0 +1,18 @@ +package gallery + +type GalleryOp struct { + Req GalleryModel + Id string + Galleries []Gallery + GalleryName string +} + +type GalleryOpStatus struct { + FileName string `json:"file_name"` + Error error `json:"error"` + Processed bool `json:"processed"` + Message string `json:"message"` + Progress float64 `json:"progress"` + TotalFileSize string `json:"file_size"` + DownloadedFileSize string `json:"downloaded_size"` +} diff --git a/tests/integration/reflect_test.go b/tests/integration/reflect_test.go index bf3f8a5b..5fd60114 100644 --- a/tests/integration/reflect_test.go +++ b/tests/integration/reflect_test.go @@ -3,7 +3,7 @@ package integration_test import ( "reflect" - config "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/config" model "github.com/go-skynet/LocalAI/pkg/model" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" From 939411300ab55cc84690e62442b51ab0f2c9de3b Mon Sep 17 00:00:00 2001 From: Ludovic Leroux Date: Fri, 1 Mar 2024 16:48:53 -0500 Subject: [PATCH 0110/2895] Bump vLLM version + more options when loading models in vLLM (#1782) * Bump vLLM version to 0.3.2 * Add vLLM model loading options * Remove transformers-exllama * Fix install exllama --- backend/backend.proto | 5 + backend/python/autogptq/autogptq.yml | 2 +- backend/python/autogptq/backend_pb2.py | 66 ++-- backend/python/bark/backend_pb2.py | 66 ++-- .../transformers/transformers-nvidia.yml | 6 +- .../transformers/transformers-rocm.yml | 6 +- .../common-env/transformers/transformers.yml | 6 +- backend/python/coqui/backend_pb2.py | 66 ++-- backend/python/diffusers/backend_pb2.py | 66 ++-- backend/python/exllama/Makefile | 5 +- backend/python/exllama/backend_pb2.py | 66 ++-- backend/python/exllama/install.sh | 18 +- backend/python/exllama/run.sh | 3 +- backend/python/exllama2/backend_pb2.py | 66 ++-- backend/python/mamba/backend_pb2.py | 66 ++-- backend/python/petals/backend_pb2.py | 66 ++-- .../sentencetransformers/backend_pb2.py | 66 ++-- .../transformers-musicgen/backend_pb2.py | 66 ++-- backend/python/transformers/backend_pb2.py | 66 ++-- backend/python/vall-e-x/backend_pb2.py | 66 ++-- backend/python/vall-e-x/ttsvalle.yml | 2 +- backend/python/vllm/backend_pb2.py | 66 ++-- backend/python/vllm/backend_vllm.py | 10 + core/backend/options.go | 69 ++-- core/config/backend_config.go | 25 +- docs/content/docs/features/text-generation.md | 12 +- pkg/grpc/proto/backend.pb.go | 348 ++++++++++-------- pkg/grpc/proto/backend_grpc.pb.go | 2 +- 28 files changed, 736 insertions(+), 641 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index e9989aec..a82db555 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -126,6 +126,11 @@ message ModelOptions { // vllm string Quantization = 40; + float GPUMemoryUtilization = 50; + bool TrustRemoteCode = 51; + bool EnforceEager = 52; + int32 SwapSpace = 53; + int32 MaxModelLen = 54; string MMProj = 41; diff --git a/backend/python/autogptq/autogptq.yml b/backend/python/autogptq/autogptq.yml index 7c8b4407..19b8e41d 100644 --- a/backend/python/autogptq/autogptq.yml +++ b/backend/python/autogptq/autogptq.yml @@ -71,7 +71,7 @@ dependencies: - regex==2023.10.3 - requests==2.31.0 - rouge==1.0.1 - - safetensors==0.3.3 + - safetensors>=0.3.3 - six==1.16.0 - sympy==1.12 - tokenizers==0.14.0 diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/autogptq/backend_pb2.py +++ b/backend/python/autogptq/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/bark/backend_pb2.py +++ b/backend/python/bark/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index d5fe07b4..3565d4ad 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -81,7 +81,7 @@ dependencies: - requests==2.31.0 - rouge==1.0.1 - s3transfer==0.7.0 - - safetensors==0.3.3 + - safetensors>=0.4.1 - scipy==1.11.3 - six==1.16.0 - sympy==1.12 @@ -113,7 +113,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.2.7 - - transformers>=4.36.0 # Required for Mixtral. + - vllm==0.3.2 + - transformers>=4.38.0 # Required for Gemma. - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index 1f5d2236..6e0dc4ce 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -71,7 +71,7 @@ dependencies: - requests==2.31.0 - rouge==1.0.1 - s3transfer==0.7.0 - - safetensors==0.3.3 + - safetensors>=0.4.1 - scipy==1.11.3 - six==1.16.0 - sympy==1.12 @@ -103,7 +103,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.2.7 - - transformers>=4.36.0 # Required for Mixtral. + - vllm==0.3.2 + - transformers>=4.38.0 # Required for Gemma. - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 2dc177c2..44e6efd6 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -69,7 +69,7 @@ dependencies: - requests==2.31.0 - rouge==1.0.1 - s3transfer==0.7.0 - - safetensors==0.3.3 + - safetensors>=0.4.1 - scipy==1.11.3 - six==1.16.0 - sympy==1.12 @@ -101,7 +101,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.2.7 - - transformers>=4.36.0 # Required for Mixtral. + - vllm==0.3.2 + - transformers>=4.38.0 # Required for Gemma. - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers \ No newline at end of file diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/coqui/backend_pb2.py +++ b/backend/python/coqui/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/diffusers/backend_pb2.py +++ b/backend/python/diffusers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/Makefile b/backend/python/exllama/Makefile index 52a180ae..b51adf76 100644 --- a/backend/python/exllama/Makefile +++ b/backend/python/exllama/Makefile @@ -1,7 +1,8 @@ +export CONDA_ENV_PATH = "exllama.yml" + .PHONY: exllama exllama: - $(MAKE) -C ../common-env/transformers - bash install.sh + bash install.sh ${CONDA_ENV_PATH} .PHONY: run run: diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/exllama/backend_pb2.py +++ b/backend/python/exllama/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/install.sh b/backend/python/exllama/install.sh index 1be2d05c..702bb1fb 100755 --- a/backend/python/exllama/install.sh +++ b/backend/python/exllama/install.sh @@ -1,14 +1,22 @@ #!/bin/bash +set -ex -## -## A bash script installs the required dependencies of VALL-E-X and prepares the environment export PATH=$PATH:/opt/conda/bin -# Activate conda environment -source activate transformers +# Check if environment exist +conda_env_exists(){ + ! conda list --name "${@}" >/dev/null 2>/dev/null +} -echo $CONDA_PREFIX +if conda_env_exists "exllama" ; then + echo "Creating virtual environment..." + conda env create --name exllama --file $1 + echo "Virtual environment created." +else + echo "Virtual environment already exists." +fi +source activate exllama git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd diff --git a/backend/python/exllama/run.sh b/backend/python/exllama/run.sh index 116d3e47..95fedb6d 100755 --- a/backend/python/exllama/run.sh +++ b/backend/python/exllama/run.sh @@ -2,11 +2,10 @@ ## ## A bash script wrapper that runs the exllama server with conda - export PATH=$PATH:/opt/conda/bin # Activate conda environment -source activate transformers +source activate exllama # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/exllama2/backend_pb2.py +++ b/backend/python/exllama2/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/mamba/backend_pb2.py +++ b/backend/python/mamba/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/petals/backend_pb2.py +++ b/backend/python/petals/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/sentencetransformers/backend_pb2.py +++ b/backend/python/sentencetransformers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/transformers-musicgen/backend_pb2.py +++ b/backend/python/transformers-musicgen/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/transformers/backend_pb2.py +++ b/backend/python/transformers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/vall-e-x/backend_pb2.py +++ b/backend/python/vall-e-x/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vall-e-x/ttsvalle.yml b/backend/python/vall-e-x/ttsvalle.yml index 72f232b5..e235bf4e 100644 --- a/backend/python/vall-e-x/ttsvalle.yml +++ b/backend/python/vall-e-x/ttsvalle.yml @@ -79,7 +79,7 @@ dependencies: - pypinyin==0.49.0 - python-multipart==0.0.6 - regex==2023.10.3 - - safetensors==0.4.0 + - safetensors>=0.4.0 - semantic-version==2.10.0 - soundfile==0.12.1 - starlette==0.27.0 diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py index be6191b1..b101e4f4 100644 --- a/backend/python/vllm/backend_pb2.py +++ b/backend/python/vllm/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,17 +14,16 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xbb\x07\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -31,31 +31,31 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=852 _globals['_REPLY']._serialized_end=876 _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1834 - _globals['_RESULT']._serialized_start=1836 - _globals['_RESULT']._serialized_end=1878 - _globals['_EMBEDDINGRESULT']._serialized_start=1880 - _globals['_EMBEDDINGRESULT']._serialized_end=1917 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1919 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1986 - _globals['_TRANSCRIPTRESULT']._serialized_start=1988 - _globals['_TRANSCRIPTRESULT']._serialized_end=2066 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2068 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2157 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2160 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2375 - _globals['_TTSREQUEST']._serialized_start=2377 - _globals['_TTSREQUEST']._serialized_end=2431 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2433 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2487 - _globals['_MEMORYUSAGEDATA']._serialized_start=2490 - _globals['_MEMORYUSAGEDATA']._serialized_end=2632 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2584 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2632 - _globals['_STATUSRESPONSE']._serialized_start=2635 - _globals['_STATUSRESPONSE']._serialized_end=2808 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2741 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2808 - _globals['_BACKEND']._serialized_start=2811 - _globals['_BACKEND']._serialized_end=3439 + _globals['_MODELOPTIONS']._serialized_end=1951 + _globals['_RESULT']._serialized_start=1953 + _globals['_RESULT']._serialized_end=1995 + _globals['_EMBEDDINGRESULT']._serialized_start=1997 + _globals['_EMBEDDINGRESULT']._serialized_end=2034 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 + _globals['_TRANSCRIPTRESULT']._serialized_start=2105 + _globals['_TRANSCRIPTRESULT']._serialized_end=2183 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 + _globals['_TTSREQUEST']._serialized_start=2494 + _globals['_TTSREQUEST']._serialized_end=2548 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 + _globals['_MEMORYUSAGEDATA']._serialized_start=2607 + _globals['_MEMORYUSAGEDATA']._serialized_end=2749 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 + _globals['_STATUSRESPONSE']._serialized_start=2752 + _globals['_STATUSRESPONSE']._serialized_end=2925 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 + _globals['_BACKEND']._serialized_start=2928 + _globals['_BACKEND']._serialized_end=3556 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py index 8f8c4ee0..ef5134b8 100644 --- a/backend/python/vllm/backend_vllm.py +++ b/backend/python/vllm/backend_vllm.py @@ -88,6 +88,16 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Quantization != "": engine_args.quantization = request.Quantization + if request.GPUMemoryUtilization != 0: + engine_args.gpu_memory_utilization = request.GPUMemoryUtilization + if request.TrustRemoteCode: + engine_args.trust_remote_code = request.TrustRemoteCode + if request.EnforceEager: + engine_args.enforce_eager = request.EnforceEager + if request.SwapSpace != 0: + engine_args.swap_space = request.SwapSpace + if request.MaxModelLen != 0: + engine_args.max_model_len = request.MaxModelLen try: self.llm = AsyncLLMEngine.from_engine_args(engine_args) diff --git a/core/backend/options.go b/core/backend/options.go index 60160572..d2bbb2b8 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -40,38 +40,43 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { } return &pb.ModelOptions{ - ContextSize: int32(c.ContextSize), - Seed: int32(c.Seed), - NBatch: int32(b), - NoMulMatQ: c.NoMulMatQ, - CUDA: c.CUDA, // diffusers, transformers - DraftModel: c.DraftModel, - AudioPath: c.VallE.AudioPath, - Quantization: c.Quantization, - MMProj: c.MMProj, - YarnExtFactor: c.YarnExtFactor, - YarnAttnFactor: c.YarnAttnFactor, - YarnBetaFast: c.YarnBetaFast, - YarnBetaSlow: c.YarnBetaSlow, - LoraAdapter: c.LoraAdapter, - LoraBase: c.LoraBase, - LoraScale: c.LoraScale, - NGQA: c.NGQA, - RMSNormEps: c.RMSNormEps, - F16Memory: c.F16, - MLock: c.MMlock, - RopeFreqBase: c.RopeFreqBase, - RopeScaling: c.RopeScaling, - Type: c.ModelType, - RopeFreqScale: c.RopeFreqScale, - NUMA: c.NUMA, - Embeddings: c.Embeddings, - LowVRAM: c.LowVRAM, - NGPULayers: int32(c.NGPULayers), - MMap: c.MMap, - MainGPU: c.MainGPU, - Threads: int32(c.Threads), - TensorSplit: c.TensorSplit, + ContextSize: int32(c.ContextSize), + Seed: int32(c.Seed), + NBatch: int32(b), + NoMulMatQ: c.NoMulMatQ, + CUDA: c.CUDA, // diffusers, transformers + DraftModel: c.DraftModel, + AudioPath: c.VallE.AudioPath, + Quantization: c.Quantization, + GPUMemoryUtilization: c.GPUMemoryUtilization, + TrustRemoteCode: c.TrustRemoteCode, + EnforceEager: c.EnforceEager, + SwapSpace: int32(c.SwapSpace), + MaxModelLen: int32(c.MaxModelLen), + MMProj: c.MMProj, + YarnExtFactor: c.YarnExtFactor, + YarnAttnFactor: c.YarnAttnFactor, + YarnBetaFast: c.YarnBetaFast, + YarnBetaSlow: c.YarnBetaSlow, + LoraAdapter: c.LoraAdapter, + LoraBase: c.LoraBase, + LoraScale: c.LoraScale, + NGQA: c.NGQA, + RMSNormEps: c.RMSNormEps, + F16Memory: c.F16, + MLock: c.MMlock, + RopeFreqBase: c.RopeFreqBase, + RopeScaling: c.RopeScaling, + Type: c.ModelType, + RopeFreqScale: c.RopeFreqScale, + NUMA: c.NUMA, + Embeddings: c.Embeddings, + LowVRAM: c.LowVRAM, + NGPULayers: int32(c.NGPULayers), + MMap: c.MMap, + MainGPU: c.MainGPU, + Threads: int32(c.Threads), + TensorSplit: c.TensorSplit, // AutoGPTQ ModelBaseName: c.AutoGPTQ.ModelBaseName, Device: c.AutoGPTQ.Device, diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 3098da86..63e5855c 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -118,16 +118,21 @@ type LLMConfig struct { TrimSpace []string `yaml:"trimspace"` TrimSuffix []string `yaml:"trimsuffix"` - ContextSize int `yaml:"context_size"` - NUMA bool `yaml:"numa"` - LoraAdapter string `yaml:"lora_adapter"` - LoraBase string `yaml:"lora_base"` - LoraScale float32 `yaml:"lora_scale"` - NoMulMatQ bool `yaml:"no_mulmatq"` - DraftModel string `yaml:"draft_model"` - NDraft int32 `yaml:"n_draft"` - Quantization string `yaml:"quantization"` - MMProj string `yaml:"mmproj"` + ContextSize int `yaml:"context_size"` + NUMA bool `yaml:"numa"` + LoraAdapter string `yaml:"lora_adapter"` + LoraBase string `yaml:"lora_base"` + LoraScale float32 `yaml:"lora_scale"` + NoMulMatQ bool `yaml:"no_mulmatq"` + DraftModel string `yaml:"draft_model"` + NDraft int32 `yaml:"n_draft"` + Quantization string `yaml:"quantization"` + GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM + TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM + EnforceEager bool `yaml:"enforce_eager"` // vLLM + SwapSpace int `yaml:"swap_space"` // vLLM + MaxModelLen int `yaml:"max_model_len"` // vLLM + MMProj string `yaml:"mmproj"` RopeScaling string `yaml:"rope_scaling"` ModelType string `yaml:"type"` diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md index bc512060..121f90a8 100644 --- a/docs/content/docs/features/text-generation.md +++ b/docs/content/docs/features/text-generation.md @@ -245,8 +245,18 @@ backend: vllm parameters: model: "facebook/opt-125m" -# Decomment to specify a quantization method (optional) +# Uncomment to specify a quantization method (optional) # quantization: "awq" +# Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%) +# gpu_memory_utilization: 0.5 +# Uncomment to trust remote code from huggingface +# trust_remote_code: true +# Uncomment to enable eager execution +# enforce_eager: true +# Uncomment to specify the size of the CPU swap space per GPU (in GiB) +# swap_space: 2 +# Uncomment to specify the maximum length of a sequence (including prompt and output) +# max_model_len: 32768 ``` The backend will automatically download the required files in order to run the model. diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go index acf9f375..1268f2cc 100644 --- a/pkg/grpc/proto/backend.pb.go +++ b/pkg/grpc/proto/backend.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.26.0 -// protoc v4.23.4 +// protoc v4.25.3 // source: backend.proto package proto @@ -576,14 +576,19 @@ type ModelOptions struct { DraftModel string `protobuf:"bytes,39,opt,name=DraftModel,proto3" json:"DraftModel,omitempty"` AudioPath string `protobuf:"bytes,38,opt,name=AudioPath,proto3" json:"AudioPath,omitempty"` // vllm - Quantization string `protobuf:"bytes,40,opt,name=Quantization,proto3" json:"Quantization,omitempty"` - MMProj string `protobuf:"bytes,41,opt,name=MMProj,proto3" json:"MMProj,omitempty"` - RopeScaling string `protobuf:"bytes,43,opt,name=RopeScaling,proto3" json:"RopeScaling,omitempty"` - YarnExtFactor float32 `protobuf:"fixed32,44,opt,name=YarnExtFactor,proto3" json:"YarnExtFactor,omitempty"` - YarnAttnFactor float32 `protobuf:"fixed32,45,opt,name=YarnAttnFactor,proto3" json:"YarnAttnFactor,omitempty"` - YarnBetaFast float32 `protobuf:"fixed32,46,opt,name=YarnBetaFast,proto3" json:"YarnBetaFast,omitempty"` - YarnBetaSlow float32 `protobuf:"fixed32,47,opt,name=YarnBetaSlow,proto3" json:"YarnBetaSlow,omitempty"` - Type string `protobuf:"bytes,49,opt,name=Type,proto3" json:"Type,omitempty"` + Quantization string `protobuf:"bytes,40,opt,name=Quantization,proto3" json:"Quantization,omitempty"` + GPUMemoryUtilization float32 `protobuf:"fixed32,50,opt,name=GPUMemoryUtilization,proto3" json:"GPUMemoryUtilization,omitempty"` + TrustRemoteCode bool `protobuf:"varint,51,opt,name=TrustRemoteCode,proto3" json:"TrustRemoteCode,omitempty"` + EnforceEager bool `protobuf:"varint,52,opt,name=EnforceEager,proto3" json:"EnforceEager,omitempty"` + SwapSpace int32 `protobuf:"varint,53,opt,name=SwapSpace,proto3" json:"SwapSpace,omitempty"` + MaxModelLen int32 `protobuf:"varint,54,opt,name=MaxModelLen,proto3" json:"MaxModelLen,omitempty"` + MMProj string `protobuf:"bytes,41,opt,name=MMProj,proto3" json:"MMProj,omitempty"` + RopeScaling string `protobuf:"bytes,43,opt,name=RopeScaling,proto3" json:"RopeScaling,omitempty"` + YarnExtFactor float32 `protobuf:"fixed32,44,opt,name=YarnExtFactor,proto3" json:"YarnExtFactor,omitempty"` + YarnAttnFactor float32 `protobuf:"fixed32,45,opt,name=YarnAttnFactor,proto3" json:"YarnAttnFactor,omitempty"` + YarnBetaFast float32 `protobuf:"fixed32,46,opt,name=YarnBetaFast,proto3" json:"YarnBetaFast,omitempty"` + YarnBetaSlow float32 `protobuf:"fixed32,47,opt,name=YarnBetaSlow,proto3" json:"YarnBetaSlow,omitempty"` + Type string `protobuf:"bytes,49,opt,name=Type,proto3" json:"Type,omitempty"` } func (x *ModelOptions) Reset() { @@ -912,6 +917,41 @@ func (x *ModelOptions) GetQuantization() string { return "" } +func (x *ModelOptions) GetGPUMemoryUtilization() float32 { + if x != nil { + return x.GPUMemoryUtilization + } + return 0 +} + +func (x *ModelOptions) GetTrustRemoteCode() bool { + if x != nil { + return x.TrustRemoteCode + } + return false +} + +func (x *ModelOptions) GetEnforceEager() bool { + if x != nil { + return x.EnforceEager + } + return false +} + +func (x *ModelOptions) GetSwapSpace() int32 { + if x != nil { + return x.SwapSpace + } + return 0 +} + +func (x *ModelOptions) GetMaxModelLen() int32 { + if x != nil { + return x.MaxModelLen + } + return 0 +} + func (x *ModelOptions) GetMMProj() string { if x != nil { return x.MMProj @@ -1703,7 +1743,7 @@ var file_backend_proto_rawDesc = []byte{ 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x22, 0xe0, 0x0b, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, + 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, @@ -1783,145 +1823,157 @@ var file_backend_proto_rawDesc = []byte{ 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, - 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, - 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, - 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, - 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, - 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, - 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, - 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, - 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, - 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, - 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, - 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, - 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, - 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, - 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, - 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, - 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, - 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, - 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, - 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, - 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, - 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, - 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, - 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, - 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, - 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, - 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, - 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, - 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, - 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, - 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, - 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, - 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, - 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, - 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, - 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, - 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, - 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, - 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, - 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, - 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x48, - 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, - 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, - 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, - 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, - 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, - 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, - 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, - 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, - 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, - 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, - 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, - 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, - 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, - 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, - 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xf4, - 0x04, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, - 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, - 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, - 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, - 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, - 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, - 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, - 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, - 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, - 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, - 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, - 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, - 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, - 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, - 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, - 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, - 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, - 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, + 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, + 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, + 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, + 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, + 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, + 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, + 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, + 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, + 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, + 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, + 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, + 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, + 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, + 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, + 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, + 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, + 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, + 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, + 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, + 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, + 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, + 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, + 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, + 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, + 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, + 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, + 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, + 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, + 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, + 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, + 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, + 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, + 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, + 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, + 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, + 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, + 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, + 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, + 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, + 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, + 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, + 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, + 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, + 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, + 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, + 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, + 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, + 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, + 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, + 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, + 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, + 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, + 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, + 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, + 0x22, 0x48, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, + 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, + 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, + 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, + 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, + 0x6e, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, + 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, + 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x27, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, + 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, + 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, + 0x6f, 0x77, 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, + 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, + 0x01, 0x22, 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, + 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, + 0x61, 0x74, 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, + 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, + 0x4c, 0x49, 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, + 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, + 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, + 0x32, 0xf4, 0x04, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, + 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, + 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, + 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, + 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, + 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, + 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, + 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, - 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, - 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, - 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, + 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, + 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, + 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, + 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, + 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, + 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, + 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, + 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, + 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, + 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, + 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, + 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, + 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, + 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, + 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go index ef5187bc..89552e36 100644 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ b/pkg/grpc/proto/backend_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.2.0 -// - protoc v4.23.4 +// - protoc v4.25.3 // source: backend.proto package proto From daa0b8741cd1792053319796cdcbbc5d0ad3e44a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 1 Mar 2024 23:38:24 +0100 Subject: [PATCH 0111/2895] :arrow_up: Update ggerganov/llama.cpp (#1785) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a52774cd..35e1a80e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=d5ab29757ebc59a30f03e408294ec20628a6374e +CPPLLAMA_VERSION?=c29af7e2252d288f2ea58a7d437c1cb7c0abf160 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a0eeb749579904267ead1b392ebba33e5607e24a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 2 Mar 2024 09:35:40 +0100 Subject: [PATCH 0112/2895] Update hot topics/roadmap Signed-off-by: Ettore Di Giacinto --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 43c534ac..e9460e79 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ - Tools API support: https://github.com/mudler/LocalAI/pull/1715 - LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 - ROCm container images: https://github.com/mudler/LocalAI/pull/1595 -- Intel GPU support (sycl): https://github.com/mudler/LocalAI/issues/1653 +- Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653 - Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651 - Mamba support: https://github.com/mudler/LocalAI/pull/1589 - Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 @@ -60,7 +60,8 @@ Hot topics (looking for contributors): - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - Assistant API: https://github.com/mudler/LocalAI/issues/1273 - Moderation endpoint: https://github.com/mudler/LocalAI/issues/999 - +- Vulkan: https://github.com/mudler/LocalAI/issues/1647 + If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 ## 💻 [Getting started](https://localai.io/basics/getting_started/index.html) From b60a3fc8797ff9b37faf0ad27610b9cc819f6d16 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 3 Mar 2024 08:49:23 +0100 Subject: [PATCH 0113/2895] :arrow_up: Update ggerganov/llama.cpp (#1789) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 35e1a80e..683b62ce 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=c29af7e2252d288f2ea58a7d437c1cb7c0abf160 +CPPLLAMA_VERSION?=4a6e2d6142ab815c964924896891e9ab3e050632 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 4ab72146cd3e1b17455d3c5af5b3316e58e5ee47 Mon Sep 17 00:00:00 2001 From: Steven Christou <1302212+christ66@users.noreply.github.com> Date: Sat, 2 Mar 2024 23:50:43 -0800 Subject: [PATCH 0114/2895] feat(assistant): Initial implementation of assistants api (#1761) Initial implementation of assistants api --- api/openai/assistant.go | 470 ++++++++++++++++++++++++++++++++++++++++ core/http/api.go | 20 ++ 2 files changed, 490 insertions(+) create mode 100644 api/openai/assistant.go diff --git a/api/openai/assistant.go b/api/openai/assistant.go new file mode 100644 index 00000000..58bb52f1 --- /dev/null +++ b/api/openai/assistant.go @@ -0,0 +1,470 @@ +package openai + +import ( + "fmt" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/options" + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "github.com/rs/zerolog/log" + "net/http" + "sort" + "strconv" + "strings" + "time" +) + +// ToolType defines a type for tool options +type ToolType string + +const ( + CodeInterpreter ToolType = "code_interpreter" + Retrieval ToolType = "retrieval" + Function ToolType = "function" + + MaxCharacterInstructions = 32768 + MaxCharacterDescription = 512 + MaxCharacterName = 256 + MaxToolsSize = 128 + MaxFileIdSize = 20 + MaxCharacterMetadataKey = 64 + MaxCharacterMetadataValue = 512 + + MaxLengthRandomID = 0 +) + +type Tool struct { + Type ToolType `json:"type"` +} + +// Assistant represents the structure of an assistant object from the OpenAI API. +type Assistant struct { + ID string `json:"id"` // The unique identifier of the assistant. + Object string `json:"object"` // Object type, which is "assistant". + Created int64 `json:"created"` // The time at which the assistant was created. + Model string `json:"model"` // The model ID used by the assistant. + Name string `json:"name,omitempty"` // The name of the assistant. + Description string `json:"description,omitempty"` // The description of the assistant. + Instructions string `json:"instructions,omitempty"` // The system instructions that the assistant uses. + Tools []Tool `json:"tools,omitempty"` // A list of tools enabled on the assistant. + FileIDs []string `json:"file_ids,omitempty"` // A list of file IDs attached to this assistant. + Metadata map[string]string `json:"metadata,omitempty"` // Set of key-value pairs attached to the assistant. +} + +var ( + assistants = []Assistant{} // better to return empty array instead of "null" +) + +type AssistantRequest struct { + Model string `json:"model"` + Name string `json:"name,omitempty"` + Description string `json:"description,omitempty"` + Instructions string `json:"instructions,omitempty"` + Tools []Tool `json:"tools,omitempty"` + FileIDs []string `json:"file_ids,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +func CreateAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + request := new(AssistantRequest) + if err := c.BodyParser(request); err != nil { + log.Warn().AnErr("Unable to parse AssistantRequest", err) + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) + } + + if !modelExists(o, request.Model) { + log.Warn().Msgf("Model: %s was not found in list of models.", request.Model) + return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found") + } + + assistant := Assistant{ + ID: "asst_" + generateRandomID(MaxLengthRandomID), + Object: "assistant", + Created: time.Now().Unix(), + Model: request.Model, + Name: request.Name, + Description: request.Description, + Instructions: request.Instructions, + Tools: request.Tools, + FileIDs: request.FileIDs, + Metadata: request.Metadata, + } + + assistants = append(assistants, assistant) + + return c.Status(fiber.StatusOK).JSON(assistant) + } +} + +func generateRandomID(maxLength int) string { + newUUID, err := uuid.NewUUID() + if err != nil { + log.Error().Msgf("Failed to generate UUID: %v", err) + return "" + } + + uuidStr := newUUID.String() + if maxLength > 0 && len(uuidStr) > maxLength { + return uuidStr[:maxLength] + } + return uuidStr +} + +func ListAssistantsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + // Parse query parameters + limitQuery := c.Query("limit", "20") + orderQuery := c.Query("order", "desc") + afterQuery := c.Query("after") + beforeQuery := c.Query("before") + + // Convert string limit to integer + limit, err := strconv.Atoi(limitQuery) + if err != nil { + return c.Status(http.StatusBadRequest).SendString(err.Error()) + } + + // Sort assistants + sort.SliceStable(assistants, func(i, j int) bool { + if orderQuery == "asc" { + return assistants[i].Created < assistants[j].Created + } + return assistants[i].Created > assistants[j].Created + }) + + // After and before cursors + if afterQuery != "" { + assistants = filterAssistantsAfterID(assistants, afterQuery) + } + if beforeQuery != "" { + assistants = filterAssistantsBeforeID(assistants, beforeQuery) + } + + // Apply limit + if limit < len(assistants) { + assistants = assistants[:limit] + } + + return c.JSON(assistants) + } +} + +// FilterAssistantsBeforeID filters out those assistants whose ID comes before the given ID +// We assume that the assistants are already sorted +func filterAssistantsBeforeID(assistants []Assistant, id string) []Assistant { + for i, assistant := range assistants { + if strings.Compare(assistant.ID, id) == 0 { + if i != 0 { + return assistants[:i] + } + return []Assistant{} + } + } + return assistants +} + +// FilterAssistantsAfterID filters out those assistants whose ID comes after the given ID +// We assume that the assistants are already sorted +func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant { + for i, assistant := range assistants { + if strings.Compare(assistant.ID, id) == 0 { + if i != len(assistants)-1 { + return assistants[i+1:] + } + return []Assistant{} + } + } + return assistants +} + +func modelExists(o *options.Option, modelName string) (found bool) { + found = false + models, err := o.Loader.ListModels() + if err != nil { + return + } + + for _, model := range models { + if model == modelName { + found = true + return + } + } + return +} + +func DeleteAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + type DeleteAssistantResponse struct { + ID string `json:"id"` + Object string `json:"object"` + Deleted bool `json:"deleted"` + } + + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for i, assistant := range assistants { + if assistant.ID == assistantID { + assistants = append(assistants[:i], assistants[i+1:]...) + return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{ + ID: assistantID, + Object: "assistant.deleted", + Deleted: true, + }) + } + } + + log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID) + return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{ + ID: assistantID, + Object: "assistant.deleted", + Deleted: false, + }) + } +} + +func GetAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for _, assistant := range assistants { + if assistant.ID == assistantID { + return c.Status(fiber.StatusOK).JSON(assistant) + } + } + + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) + } +} + +type AssistantFile struct { + ID string `json:"id"` + Object string `json:"object"` + CreatedAt int64 `json:"created_at"` + AssistantID string `json:"assistant_id"` +} + +var assistantFiles []AssistantFile + +func CreateAssistantFileEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + type AssistantFileRequest struct { + FileID string `json:"file_id"` + } + + return func(c *fiber.Ctx) error { + request := new(AssistantFileRequest) + if err := c.BodyParser(request); err != nil { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) + } + + assistantID := c.Query("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for _, assistant := range assistants { + if assistant.ID == assistantID { + if len(assistant.FileIDs) > MaxFileIdSize { + return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("Max files %d for assistant %s reached.", MaxFileIdSize, assistant.Name)) + } + + for _, file := range uploadedFiles { + if file.ID == request.FileID { + assistant.FileIDs = append(assistant.FileIDs, request.FileID) + assistantFile := AssistantFile{ + ID: file.ID, + Object: "assistant.file", + CreatedAt: time.Now().Unix(), + AssistantID: assistant.ID, + } + assistantFiles = append(assistantFiles, assistantFile) + return c.Status(fiber.StatusOK).JSON(assistantFile) + } + } + + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find file_id: %s", request.FileID)) + } + } + + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find ")) + } +} + +func ListAssistantFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + type ListAssistantFiles struct { + Data []File + Object string + } + + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + limitQuery := c.Query("limit", "20") + order := c.Query("order", "desc") + limit, err := strconv.Atoi(limitQuery) + if err != nil || limit < 1 || limit > 100 { + limit = 20 // Default to 20 if there's an error or the limit is out of bounds + } + + // Sort files by CreatedAt depending on the order query parameter + if order == "asc" { + sort.Slice(assistantFiles, func(i, j int) bool { + return assistantFiles[i].CreatedAt < assistantFiles[j].CreatedAt + }) + } else { // default to "desc" + sort.Slice(assistantFiles, func(i, j int) bool { + return assistantFiles[i].CreatedAt > assistantFiles[j].CreatedAt + }) + } + + // Limit the number of files returned + var limitedFiles []AssistantFile + hasMore := false + if len(assistantFiles) > limit { + hasMore = true + limitedFiles = assistantFiles[:limit] + } else { + limitedFiles = assistantFiles + } + + response := map[string]interface{}{ + "object": "list", + "data": limitedFiles, + "first_id": func() string { + if len(limitedFiles) > 0 { + return limitedFiles[0].ID + } + return "" + }(), + "last_id": func() string { + if len(limitedFiles) > 0 { + return limitedFiles[len(limitedFiles)-1].ID + } + return "" + }(), + "has_more": hasMore, + } + + return c.Status(fiber.StatusOK).JSON(response) + } +} + +func ModifyAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + request := new(AssistantRequest) + if err := c.BodyParser(request); err != nil { + log.Warn().AnErr("Unable to parse AssistantRequest", err) + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) + } + + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for i, assistant := range assistants { + if assistant.ID == assistantID { + newAssistant := Assistant{ + ID: assistantID, + Object: assistant.Object, + Created: assistant.Created, + Model: request.Model, + Name: request.Name, + Description: request.Description, + Instructions: request.Instructions, + Tools: request.Tools, + FileIDs: request.FileIDs, // todo: should probably verify fileids exist + Metadata: request.Metadata, + } + + // Remove old one and replace with new one + assistants = append(assistants[:i], assistants[i+1:]...) + assistants = append(assistants, newAssistant) + return c.Status(fiber.StatusOK).JSON(newAssistant) + } + } + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) + } +} + +func DeleteAssistantFileEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + type DeleteAssistantFileResponse struct { + ID string `json:"id"` + Object string `json:"object"` + Deleted bool `json:"deleted"` + } + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + fileId := c.Params("file_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required") + } + // First remove file from assistant + for i, assistant := range assistants { + if assistant.ID == assistantID { + for j, fileId := range assistant.FileIDs { + if fileId == fileId { + assistants[i].FileIDs = append(assistants[i].FileIDs[:j], assistants[i].FileIDs[j+1:]...) + + // Check if the file exists in the assistantFiles slice + for i, assistantFile := range assistantFiles { + if assistantFile.ID == fileId { + // Remove the file from the assistantFiles slice + assistantFiles = append(assistantFiles[:i], assistantFiles[i+1:]...) + return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: true, + }) + } + } + } + } + + log.Warn().Msgf("Unable to locate file_id: %s in assistants: %s", fileId, assistantID) + return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: false, + }) + } + } + log.Warn().Msgf("Unable to find assistant: %s", assistantID) + + return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: false, + }) + } +} + +func GetAssistantFileEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + fileId := c.Params("file_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required") + } + + for _, assistantFile := range assistantFiles { + if assistantFile.AssistantID == assistantID { + if assistantFile.ID == fileId { + return c.Status(fiber.StatusOK).JSON(assistantFile) + } + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId)) + } + } + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID)) + } +} diff --git a/core/http/api.go b/core/http/api.go index e2646a14..c010b6ca 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -157,6 +157,26 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + // assistant + app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, options)) + app.Get("/assistants", openai.ListAssistantsEndpoint(cl, options)) + app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, options)) + app.Post("/assistants", openai.CreateAssistantEndpoint(cl, options)) + app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, options)) + app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, options)) + app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, options)) + app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, options)) + app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, options)) + app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, options)) + app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, options)) + app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, options)) + app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, options)) + app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, options)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, options)) + app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, options)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, options)) + app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, options)) + // files app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) From 006511ee254e73206243c15406cd16d739df5268 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 3 Mar 2024 10:31:06 +0100 Subject: [PATCH 0115/2895] Revert "feat(assistant): Initial implementation of assistants api" (#1790) Revert "feat(assistant): Initial implementation of assistants api (#1761)" This reverts commit 4ab72146cd3e1b17455d3c5af5b3316e58e5ee47. --- api/openai/assistant.go | 470 ---------------------------------------- core/http/api.go | 20 -- 2 files changed, 490 deletions(-) delete mode 100644 api/openai/assistant.go diff --git a/api/openai/assistant.go b/api/openai/assistant.go deleted file mode 100644 index 58bb52f1..00000000 --- a/api/openai/assistant.go +++ /dev/null @@ -1,470 +0,0 @@ -package openai - -import ( - "fmt" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/options" - "github.com/gofiber/fiber/v2" - "github.com/google/uuid" - "github.com/rs/zerolog/log" - "net/http" - "sort" - "strconv" - "strings" - "time" -) - -// ToolType defines a type for tool options -type ToolType string - -const ( - CodeInterpreter ToolType = "code_interpreter" - Retrieval ToolType = "retrieval" - Function ToolType = "function" - - MaxCharacterInstructions = 32768 - MaxCharacterDescription = 512 - MaxCharacterName = 256 - MaxToolsSize = 128 - MaxFileIdSize = 20 - MaxCharacterMetadataKey = 64 - MaxCharacterMetadataValue = 512 - - MaxLengthRandomID = 0 -) - -type Tool struct { - Type ToolType `json:"type"` -} - -// Assistant represents the structure of an assistant object from the OpenAI API. -type Assistant struct { - ID string `json:"id"` // The unique identifier of the assistant. - Object string `json:"object"` // Object type, which is "assistant". - Created int64 `json:"created"` // The time at which the assistant was created. - Model string `json:"model"` // The model ID used by the assistant. - Name string `json:"name,omitempty"` // The name of the assistant. - Description string `json:"description,omitempty"` // The description of the assistant. - Instructions string `json:"instructions,omitempty"` // The system instructions that the assistant uses. - Tools []Tool `json:"tools,omitempty"` // A list of tools enabled on the assistant. - FileIDs []string `json:"file_ids,omitempty"` // A list of file IDs attached to this assistant. - Metadata map[string]string `json:"metadata,omitempty"` // Set of key-value pairs attached to the assistant. -} - -var ( - assistants = []Assistant{} // better to return empty array instead of "null" -) - -type AssistantRequest struct { - Model string `json:"model"` - Name string `json:"name,omitempty"` - Description string `json:"description,omitempty"` - Instructions string `json:"instructions,omitempty"` - Tools []Tool `json:"tools,omitempty"` - FileIDs []string `json:"file_ids,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` -} - -func CreateAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - request := new(AssistantRequest) - if err := c.BodyParser(request); err != nil { - log.Warn().AnErr("Unable to parse AssistantRequest", err) - return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) - } - - if !modelExists(o, request.Model) { - log.Warn().Msgf("Model: %s was not found in list of models.", request.Model) - return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found") - } - - assistant := Assistant{ - ID: "asst_" + generateRandomID(MaxLengthRandomID), - Object: "assistant", - Created: time.Now().Unix(), - Model: request.Model, - Name: request.Name, - Description: request.Description, - Instructions: request.Instructions, - Tools: request.Tools, - FileIDs: request.FileIDs, - Metadata: request.Metadata, - } - - assistants = append(assistants, assistant) - - return c.Status(fiber.StatusOK).JSON(assistant) - } -} - -func generateRandomID(maxLength int) string { - newUUID, err := uuid.NewUUID() - if err != nil { - log.Error().Msgf("Failed to generate UUID: %v", err) - return "" - } - - uuidStr := newUUID.String() - if maxLength > 0 && len(uuidStr) > maxLength { - return uuidStr[:maxLength] - } - return uuidStr -} - -func ListAssistantsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - // Parse query parameters - limitQuery := c.Query("limit", "20") - orderQuery := c.Query("order", "desc") - afterQuery := c.Query("after") - beforeQuery := c.Query("before") - - // Convert string limit to integer - limit, err := strconv.Atoi(limitQuery) - if err != nil { - return c.Status(http.StatusBadRequest).SendString(err.Error()) - } - - // Sort assistants - sort.SliceStable(assistants, func(i, j int) bool { - if orderQuery == "asc" { - return assistants[i].Created < assistants[j].Created - } - return assistants[i].Created > assistants[j].Created - }) - - // After and before cursors - if afterQuery != "" { - assistants = filterAssistantsAfterID(assistants, afterQuery) - } - if beforeQuery != "" { - assistants = filterAssistantsBeforeID(assistants, beforeQuery) - } - - // Apply limit - if limit < len(assistants) { - assistants = assistants[:limit] - } - - return c.JSON(assistants) - } -} - -// FilterAssistantsBeforeID filters out those assistants whose ID comes before the given ID -// We assume that the assistants are already sorted -func filterAssistantsBeforeID(assistants []Assistant, id string) []Assistant { - for i, assistant := range assistants { - if strings.Compare(assistant.ID, id) == 0 { - if i != 0 { - return assistants[:i] - } - return []Assistant{} - } - } - return assistants -} - -// FilterAssistantsAfterID filters out those assistants whose ID comes after the given ID -// We assume that the assistants are already sorted -func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant { - for i, assistant := range assistants { - if strings.Compare(assistant.ID, id) == 0 { - if i != len(assistants)-1 { - return assistants[i+1:] - } - return []Assistant{} - } - } - return assistants -} - -func modelExists(o *options.Option, modelName string) (found bool) { - found = false - models, err := o.Loader.ListModels() - if err != nil { - return - } - - for _, model := range models { - if model == modelName { - found = true - return - } - } - return -} - -func DeleteAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - type DeleteAssistantResponse struct { - ID string `json:"id"` - Object string `json:"object"` - Deleted bool `json:"deleted"` - } - - return func(c *fiber.Ctx) error { - assistantID := c.Params("assistant_id") - if assistantID == "" { - return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") - } - - for i, assistant := range assistants { - if assistant.ID == assistantID { - assistants = append(assistants[:i], assistants[i+1:]...) - return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{ - ID: assistantID, - Object: "assistant.deleted", - Deleted: true, - }) - } - } - - log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID) - return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{ - ID: assistantID, - Object: "assistant.deleted", - Deleted: false, - }) - } -} - -func GetAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - assistantID := c.Params("assistant_id") - if assistantID == "" { - return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") - } - - for _, assistant := range assistants { - if assistant.ID == assistantID { - return c.Status(fiber.StatusOK).JSON(assistant) - } - } - - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) - } -} - -type AssistantFile struct { - ID string `json:"id"` - Object string `json:"object"` - CreatedAt int64 `json:"created_at"` - AssistantID string `json:"assistant_id"` -} - -var assistantFiles []AssistantFile - -func CreateAssistantFileEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - type AssistantFileRequest struct { - FileID string `json:"file_id"` - } - - return func(c *fiber.Ctx) error { - request := new(AssistantFileRequest) - if err := c.BodyParser(request); err != nil { - return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) - } - - assistantID := c.Query("assistant_id") - if assistantID == "" { - return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") - } - - for _, assistant := range assistants { - if assistant.ID == assistantID { - if len(assistant.FileIDs) > MaxFileIdSize { - return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("Max files %d for assistant %s reached.", MaxFileIdSize, assistant.Name)) - } - - for _, file := range uploadedFiles { - if file.ID == request.FileID { - assistant.FileIDs = append(assistant.FileIDs, request.FileID) - assistantFile := AssistantFile{ - ID: file.ID, - Object: "assistant.file", - CreatedAt: time.Now().Unix(), - AssistantID: assistant.ID, - } - assistantFiles = append(assistantFiles, assistantFile) - return c.Status(fiber.StatusOK).JSON(assistantFile) - } - } - - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find file_id: %s", request.FileID)) - } - } - - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find ")) - } -} - -func ListAssistantFilesEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - type ListAssistantFiles struct { - Data []File - Object string - } - - return func(c *fiber.Ctx) error { - assistantID := c.Params("assistant_id") - if assistantID == "" { - return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") - } - - limitQuery := c.Query("limit", "20") - order := c.Query("order", "desc") - limit, err := strconv.Atoi(limitQuery) - if err != nil || limit < 1 || limit > 100 { - limit = 20 // Default to 20 if there's an error or the limit is out of bounds - } - - // Sort files by CreatedAt depending on the order query parameter - if order == "asc" { - sort.Slice(assistantFiles, func(i, j int) bool { - return assistantFiles[i].CreatedAt < assistantFiles[j].CreatedAt - }) - } else { // default to "desc" - sort.Slice(assistantFiles, func(i, j int) bool { - return assistantFiles[i].CreatedAt > assistantFiles[j].CreatedAt - }) - } - - // Limit the number of files returned - var limitedFiles []AssistantFile - hasMore := false - if len(assistantFiles) > limit { - hasMore = true - limitedFiles = assistantFiles[:limit] - } else { - limitedFiles = assistantFiles - } - - response := map[string]interface{}{ - "object": "list", - "data": limitedFiles, - "first_id": func() string { - if len(limitedFiles) > 0 { - return limitedFiles[0].ID - } - return "" - }(), - "last_id": func() string { - if len(limitedFiles) > 0 { - return limitedFiles[len(limitedFiles)-1].ID - } - return "" - }(), - "has_more": hasMore, - } - - return c.Status(fiber.StatusOK).JSON(response) - } -} - -func ModifyAssistantEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - request := new(AssistantRequest) - if err := c.BodyParser(request); err != nil { - log.Warn().AnErr("Unable to parse AssistantRequest", err) - return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) - } - - assistantID := c.Params("assistant_id") - if assistantID == "" { - return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") - } - - for i, assistant := range assistants { - if assistant.ID == assistantID { - newAssistant := Assistant{ - ID: assistantID, - Object: assistant.Object, - Created: assistant.Created, - Model: request.Model, - Name: request.Name, - Description: request.Description, - Instructions: request.Instructions, - Tools: request.Tools, - FileIDs: request.FileIDs, // todo: should probably verify fileids exist - Metadata: request.Metadata, - } - - // Remove old one and replace with new one - assistants = append(assistants[:i], assistants[i+1:]...) - assistants = append(assistants, newAssistant) - return c.Status(fiber.StatusOK).JSON(newAssistant) - } - } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) - } -} - -func DeleteAssistantFileEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - type DeleteAssistantFileResponse struct { - ID string `json:"id"` - Object string `json:"object"` - Deleted bool `json:"deleted"` - } - return func(c *fiber.Ctx) error { - assistantID := c.Params("assistant_id") - fileId := c.Params("file_id") - if assistantID == "" { - return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required") - } - // First remove file from assistant - for i, assistant := range assistants { - if assistant.ID == assistantID { - for j, fileId := range assistant.FileIDs { - if fileId == fileId { - assistants[i].FileIDs = append(assistants[i].FileIDs[:j], assistants[i].FileIDs[j+1:]...) - - // Check if the file exists in the assistantFiles slice - for i, assistantFile := range assistantFiles { - if assistantFile.ID == fileId { - // Remove the file from the assistantFiles slice - assistantFiles = append(assistantFiles[:i], assistantFiles[i+1:]...) - return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ - ID: fileId, - Object: "assistant.file.deleted", - Deleted: true, - }) - } - } - } - } - - log.Warn().Msgf("Unable to locate file_id: %s in assistants: %s", fileId, assistantID) - return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ - ID: fileId, - Object: "assistant.file.deleted", - Deleted: false, - }) - } - } - log.Warn().Msgf("Unable to find assistant: %s", assistantID) - - return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ - ID: fileId, - Object: "assistant.file.deleted", - Deleted: false, - }) - } -} - -func GetAssistantFileEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - assistantID := c.Params("assistant_id") - fileId := c.Params("file_id") - if assistantID == "" { - return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required") - } - - for _, assistantFile := range assistantFiles { - if assistantFile.AssistantID == assistantID { - if assistantFile.ID == fileId { - return c.Status(fiber.StatusOK).JSON(assistantFile) - } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId)) - } - } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID)) - } -} diff --git a/core/http/api.go b/core/http/api.go index c010b6ca..e2646a14 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -157,26 +157,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) - // assistant - app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, options)) - app.Get("/assistants", openai.ListAssistantsEndpoint(cl, options)) - app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, options)) - app.Post("/assistants", openai.CreateAssistantEndpoint(cl, options)) - app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, options)) - app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, options)) - app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, options)) - app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, options)) - app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, options)) - app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, options)) - app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, options)) - app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, options)) - app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, options)) - app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, options)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, options)) - app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, options)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, options)) - app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, options)) - // files app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) From d0bd961bdec00a1da1441d6fb5794ae9ef5df5e9 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 4 Mar 2024 09:44:21 +0100 Subject: [PATCH 0116/2895] :arrow_up: Update ggerganov/llama.cpp (#1791) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 683b62ce..c628fdf5 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=4a6e2d6142ab815c964924896891e9ab3e050632 +CPPLLAMA_VERSION?=67be2ce1015d070b3b2cd488bcb041eefb61de72 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From c8e29033c2eaca9b2534197a697070f3af8c355a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 5 Mar 2024 08:59:09 +0100 Subject: [PATCH 0117/2895] :arrow_up: Update ggerganov/llama.cpp (#1794) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c628fdf5..82cfc535 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=67be2ce1015d070b3b2cd488bcb041eefb61de72 +CPPLLAMA_VERSION?=e0843afe1b37890b631bc7d3d2da2ed36c862b91 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 333f9180050035e56594407ed61f905252bf059e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 09:45:54 +0100 Subject: [PATCH 0118/2895] Update integrations.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index 36cfec2d..d71abd85 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -14,7 +14,7 @@ The list below is a list of software that integrates with LocalAI. - [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm) - [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI. -- [https://plugins.jetbrains.com/plugin/21056-codegpt] allows for custom OpenAI compatible endpoints since 2.4.0 +- https://plugins.jetbrains.com/plugin/21056-codegpt allows for custom OpenAI compatible endpoints since 2.4.0 - https://github.com/longy2k/obsidian-bmo-chatbot - https://github.com/FlowiseAI/Flowise - https://github.com/k8sgpt-ai/k8sgpt From e586dc29247b753a244da44d4c664ae67859032a Mon Sep 17 00:00:00 2001 From: Luna Midori <118759930+lunamidori5@users.noreply.github.com> Date: Tue, 5 Mar 2024 01:14:30 -0800 Subject: [PATCH 0119/2895] Edit links in readme and integrations page (#1796) * Update integrations.md Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> * Update README.md Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> * Update README.md Co-authored-by: Ettore Di Giacinto Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> * Update README.md Co-authored-by: Ettore Di Giacinto Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> --------- Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto --- README.md | 4 +--- docs/content/docs/integrations.md | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e9460e79..082da33e 100644 --- a/README.md +++ b/README.md @@ -99,9 +99,6 @@ WebUIs: Model galleries - https://github.com/go-skynet/model-gallery - -UI / Management Programs -- [LocalAI Manager](https://io.midori-ai.xyz/howtos/easy-model-installer/) Other: - Helm chart https://github.com/go-skynet/helm-charts @@ -112,6 +109,7 @@ Other: - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot - Examples: https://github.com/mudler/LocalAI/tree/master/examples/ + ### 🔗 Resources diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index d71abd85..29f2db17 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -26,5 +26,6 @@ The list below is a list of software that integrates with LocalAI. - https://github.com/charmbracelet/mods - https://github.com/cedriking/spark - [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI +- [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages! From 504f2e8bf41630096d9408f48394db05114c0aa5 Mon Sep 17 00:00:00 2001 From: TwinFin <57421631+TwinFinz@users.noreply.github.com> Date: Tue, 5 Mar 2024 05:10:00 -0500 Subject: [PATCH 0120/2895] Update Backend Dependancies (#1797) * Update transformers.yml Signed-off-by: TwinFin <57421631+TwinFinz@users.noreply.github.com> * Update transformers-rocm.yml Signed-off-by: TwinFin <57421631+TwinFinz@users.noreply.github.com> * Update transformers-nvidia.yml Signed-off-by: TwinFin <57421631+TwinFinz@users.noreply.github.com> --------- Signed-off-by: TwinFin <57421631+TwinFinz@users.noreply.github.com> --- .../common-env/transformers/transformers-nvidia.yml | 6 +++--- .../python/common-env/transformers/transformers-rocm.yml | 6 +++--- backend/python/common-env/transformers/transformers.yml | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index 3565d4ad..f851677e 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -36,7 +36,7 @@ dependencies: - TTS==0.22.0 - charset-normalizer==3.3.0 - datasets==2.14.5 - - sentence-transformers==2.2.2 + - sentence-transformers==2.5.1 # Updated Version - sentencepiece==0.1.99 - dill==0.3.7 - einops==0.7.0 @@ -82,7 +82,7 @@ dependencies: - rouge==1.0.1 - s3transfer==0.7.0 - safetensors>=0.4.1 - - scipy==1.11.3 + - scipy==1.12.0 # Updated Version - six==1.16.0 - sympy==1.12 - tokenizers @@ -114,6 +114,6 @@ dependencies: - sudachidict_core - vocos - vllm==0.3.2 - - transformers>=4.38.0 # Required for Gemma. + - transformers>=4.38.2 # Updated Version - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index 6e0dc4ce..5c18d301 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -38,7 +38,7 @@ dependencies: - TTS==0.22.0 - charset-normalizer==3.3.0 - datasets==2.14.5 - - sentence-transformers==2.2.2 + - sentence-transformers==2.5.1 # Updated Version - sentencepiece==0.1.99 - dill==0.3.7 - einops==0.7.0 @@ -72,7 +72,7 @@ dependencies: - rouge==1.0.1 - s3transfer==0.7.0 - safetensors>=0.4.1 - - scipy==1.11.3 + - scipy==1.12.0 # Updated Version - six==1.16.0 - sympy==1.12 - tokenizers @@ -104,6 +104,6 @@ dependencies: - sudachidict_core - vocos - vllm==0.3.2 - - transformers>=4.38.0 # Required for Gemma. + - transformers>=4.38.2 # Updated Version - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 44e6efd6..5726abaf 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -36,7 +36,7 @@ dependencies: - TTS==0.22.0 - charset-normalizer==3.3.0 - datasets==2.14.5 - - sentence-transformers==2.2.2 + - sentence-transformers==2.5.1 # Updated Version - sentencepiece==0.1.99 - dill==0.3.7 - einops==0.7.0 @@ -70,7 +70,7 @@ dependencies: - rouge==1.0.1 - s3transfer==0.7.0 - safetensors>=0.4.1 - - scipy==1.11.3 + - scipy==1.12.0 # Updated Version - six==1.16.0 - sympy==1.12 - tokenizers @@ -102,6 +102,6 @@ dependencies: - sudachidict_core - vocos - vllm==0.3.2 - - transformers>=4.38.0 # Required for Gemma. + - transformers>=4.38.2 # Updated Version - xformers==0.0.23.post1 -prefix: /opt/conda/envs/transformers \ No newline at end of file +prefix: /opt/conda/envs/transformers From 5c69dd155fd1a7ab471db562a63ea564cd9b1a61 Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 5 Mar 2024 13:47:15 -0500 Subject: [PATCH 0121/2895] feat(autogpt/transformers): consume `trust_remote_code` (#1799) trusting remote code by default is a danger to our users --- backend/python/autogptq/autogptq.py | 2 +- backend/python/transformers/transformers_server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index db44f507..ffb37569 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -33,7 +33,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): model = AutoGPTQForCausalLM.from_quantized(request.Model, model_basename=request.ModelBaseName, use_safetensors=True, - trust_remote_code=True, + trust_remote_code=request.TrustRemoteCode, device=device, use_triton=request.UseTriton, quantize_config=None) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 1b177057..fe0b815a 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -69,9 +69,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): model_name = request.Model try: if request.Type == "AutoModelForCausalLM": - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) else: - self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True) + self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.CUDA = False From db7f4955a1243f63bd065eaf1fc196b4923cd0d8 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 5 Mar 2024 22:50:27 +0100 Subject: [PATCH 0122/2895] :arrow_up: Update ggerganov/llama.cpp (#1801) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 82cfc535..c9134a0d 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=e0843afe1b37890b631bc7d3d2da2ed36c862b91 +CPPLLAMA_VERSION?=bd836944f826f07e19b7edcf994a78728da49c1c # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e022b5959ea409586bcead3473bbe8c180b9d2bf Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 6 Mar 2024 00:39:57 +0100 Subject: [PATCH 0123/2895] :arrow_up: Update mudler/go-stable-diffusion (#1802) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c9134a0d..b24ed797 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07 # stablediffusion version -STABLEDIFFUSION_VERSION?=d5d2be8e7e395c2d73ceef61e6fe8d240f2cd831 +STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485 # tinydream version TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a From ad6fd7a9914cbdfee19f7ac2eb235e5f420546aa Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 6 Mar 2024 23:28:31 +0100 Subject: [PATCH 0124/2895] :arrow_up: Update ggerganov/llama.cpp (#1805) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b24ed797..5159431a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=bd836944f826f07e19b7edcf994a78728da49c1c +CPPLLAMA_VERSION?=e25fb4b18fcedb9bed6be4585cf842e9a669b28b # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 5d1018495f919732c37977fa57cf2d9914e3bc6b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 7 Mar 2024 14:37:45 +0100 Subject: [PATCH 0125/2895] feat(intel): add diffusers/transformers support (#1746) * feat(intel): add diffusers support * try to consume upstream container image * Debug * Manually install deps * Map transformers/hf cache dir to modelpath if not specified * fix(compel): update initialization, pass by all gRPC options * fix: add dependencies, implement transformers for xpu * base it from the oneapi image * Add pillow * set threads if specified when launching the API * Skip conda install if intel * defaults to non-intel * ci: add to pipelines * prepare compel only if enabled * Skip conda install if intel * fix cleanup * Disable compel by default * Install torch 2.1.0 with Intel * Skip conda on some setups * Detect python * Quiet output * Do not override system python with conda * Prefer python3 * Fixups * exllama2: do not install without conda (overrides pytorch version) * exllama/exllama2: do not install if not using cuda * Add missing dataset dependency * Small fixups, symlink to python, add requirements * Add neural_speed to the deps * correctly handle model offloading * fix: device_map == xpu * go back at calling python, fixed at dockerfile level * Exllama2 restricted to only nvidia gpus * Tokenizer to xpu --- .github/workflows/image-pr.yml | 10 +++- .github/workflows/image.yml | 16 +++++++ Dockerfile | 34 ++++++++------ Makefile | 7 +++ .../python/common-env/transformers/Makefile | 7 +++ .../python/common-env/transformers/install.sh | 34 ++++++++++---- backend/python/diffusers/Makefile | 7 +++ backend/python/diffusers/backend_diffusers.py | 25 +++++++--- backend/python/diffusers/install.sh | 46 +++++++++++++++---- backend/python/diffusers/run.sh | 13 ++++-- backend/python/exllama/install.sh | 5 ++ backend/python/exllama2/install.sh | 8 +++- backend/python/mamba/install.sh | 3 +- backend/python/petals/Makefile | 2 +- backend/python/petals/install.sh | 5 ++ backend/python/transformers/run.sh | 12 +++-- .../transformers/transformers_server.py | 27 ++++++++++- backend/python/vall-e-x/Makefile | 4 ++ backend/python/vall-e-x/install.sh | 11 +++-- core/backend/image.go | 23 +++------- core/backend/llm.go | 7 ++- core/backend/options.go | 18 ++++++-- pkg/model/initializers.go | 7 +++ 23 files changed, 250 insertions(+), 81 deletions(-) create mode 100644 backend/python/petals/install.sh diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 527a8479..2e9a0afe 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -59,6 +59,14 @@ jobs: image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + tag-suffix: 'sycl-f16-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -105,4 +113,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:22.04" \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index a9620baa..2a7fac27 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -120,6 +120,22 @@ jobs: image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + tag-suffix: '-sycl-f16-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' + - build-type: 'sycl_f32' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + tag-suffix: '-sycl-f32-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' # Core images - build-type: 'sycl_f16' platforms: 'linux/amd64' diff --git a/Dockerfile b/Dockerfile index a04a866e..fd365962 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,8 @@ ARG BASE_IMAGE=ubuntu:22.04 # extras or core FROM ${BASE_IMAGE} as requirements-core +USER root + ARG GO_VERSION=1.21.7 ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=11 @@ -21,7 +23,7 @@ RUN apt-get update && \ apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean # Install Go -RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz +RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz ENV PATH $PATH:/usr/local/go/bin COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ @@ -79,6 +81,10 @@ RUN pip install --upgrade pip RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN apt-get install -y espeak-ng espeak && apt-get clean +RUN if [ ! -e /usr/bin/python ]; then \ + ln -s /usr/bin/python3 /usr/bin/python \ + ; fi + ################################### ################################### @@ -166,43 +172,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/ ## Duplicated from Makefile to avoid having a big layer that's hard to push RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \ + make -C backend/python/autogptq \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/bark \ + make -C backend/python/bark \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \ + make -C backend/python/diffusers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \ + make -C backend/python/vllm \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/mamba \ + make -C backend/python/mamba \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \ + make -C backend/python/sentencetransformers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \ + make -C backend/python/transformers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \ + make -C backend/python/vall-e-x \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \ + make -C backend/python/exllama \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \ + make -C backend/python/exllama2 \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/petals \ + make -C backend/python/petals \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \ + make -C backend/python/transformers-musicgen \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \ + make -C backend/python/coqui \ ; fi # Make sure the models directory exists diff --git a/Makefile b/Makefile index 5159431a..f91fb47e 100644 --- a/Makefile +++ b/Makefile @@ -557,3 +557,10 @@ docker-image-intel: --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . + +docker-image-intel-xpu: + docker build \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS="none" \ + --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . \ No newline at end of file diff --git a/backend/python/common-env/transformers/Makefile b/backend/python/common-env/transformers/Makefile index 1cd71ab1..797af083 100644 --- a/backend/python/common-env/transformers/Makefile +++ b/backend/python/common-env/transformers/Makefile @@ -8,6 +8,13 @@ ifeq ($(BUILD_TYPE), hipblas) CONDA_ENV_PATH = "transformers-rocm.yml" endif +# Intel GPU are supposed to have dependencies installed in the main python +# environment, so we skip conda installation for SYCL builds. +# https://github.com/intel/intel-extension-for-pytorch/issues/538 +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + .PHONY: transformers transformers: @echo "Installing $(CONDA_ENV_PATH)..." diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 42965bdb..e268fcc8 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -1,24 +1,38 @@ #!/bin/bash set -ex +SKIP_CONDA=${SKIP_CONDA:-0} + # Check if environment exist conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null } -if conda_env_exists "transformers" ; then - echo "Creating virtual environment..." - conda env create --name transformers --file $1 - echo "Virtual environment created." -else - echo "Virtual environment already exists." +if [ $SKIP_CONDA -eq 1 ]; then + echo "Skipping conda environment installation" +else + export PATH=$PATH:/opt/conda/bin + if conda_env_exists "transformers" ; then + echo "Creating virtual environment..." + conda env create --name transformers --file $1 + echo "Virtual environment created." + else + echo "Virtual environment already exists." + fi +fi + +if [ -d "/opt/intel" ]; then + # Intel GPU: If the directory exists, we assume we are using the intel image + # (no conda env) + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed fi if [ "$PIP_CACHE_PURGE" = true ] ; then - export PATH=$PATH:/opt/conda/bin - - # Activate conda environment - source activate transformers + if [ $SKIP_CONDA -eq 0 ]; then + # Activate conda environment + source activate transformers + fi pip cache purge fi \ No newline at end of file diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index 70a62b60..40e1d1a7 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -4,6 +4,13 @@ ifeq ($(BUILD_TYPE), hipblas) export CONDA_ENV_PATH = "diffusers-rocm.yml" endif +# Intel GPU are supposed to have dependencies installed in the main python +# environment, so we skip conda installation for SYCL builds. +# https://github.com/intel/intel-extension-for-pytorch/issues/538 +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + .PHONY: diffusers diffusers: @echo "Installing $(CONDA_ENV_PATH)..." diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py index 6780cae6..ec2dea60 100755 --- a/backend/python/diffusers/backend_diffusers.py +++ b/backend/python/diffusers/backend_diffusers.py @@ -21,14 +21,15 @@ from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipelin from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.utils import load_image,export_to_video -from compel import Compel +from compel import Compel, ReturnedEmbeddingsType from transformers import CLIPTextModel from safetensors.torch import load_file _ONE_DAY_IN_SECONDS = 60 * 60 * 24 -COMPEL=os.environ.get("COMPEL", "1") == "1" +COMPEL=os.environ.get("COMPEL", "0") == "1" +XPU=os.environ.get("XPU", "0") == "1" CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1" SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1" CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8") @@ -36,6 +37,10 @@ FPS=os.environ.get("FPS", "7") DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1" FRAMES=os.environ.get("FRAMES", "64") +if XPU: + import intel_extension_for_pytorch as ipex + print(ipex.xpu.get_device_name(0)) + # If MAX_WORKERS are specified in the environment use it, otherwise default to 1 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) @@ -231,8 +236,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.SchedulerType != "": self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config) - if not self.img2vid: - self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder) + if COMPEL: + self.compel = Compel( + tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], + text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], + returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, + requires_pooled=[False, True] + ) if request.ControlNet: @@ -247,6 +257,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.pipe.to('cuda') if self.controlnet: self.controlnet.to('cuda') + if XPU: + self.pipe = self.pipe.to("xpu") # Assume directory from request.ModelFile. # Only if request.LoraAdapter it's not an absolute path if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter: @@ -386,8 +398,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): image = {} if COMPEL: - conditioning = self.compel.build_conditioning_tensor(prompt) - kwargs["prompt_embeds"]= conditioning + conditioning, pooled = self.compel.build_conditioning_tensor(prompt) + kwargs["prompt_embeds"] = conditioning + kwargs["pooled_prompt_embeds"] = pooled # pass the kwargs dictionary to the self.pipe method image = self.pipe( guidance_scale=self.cfg_scale, diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index 0429826e..d83ec0be 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -1,24 +1,50 @@ #!/bin/bash set -ex +SKIP_CONDA=${SKIP_CONDA:-0} + # Check if environment exist conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null } -if conda_env_exists "diffusers" ; then - echo "Creating virtual environment..." - conda env create --name diffusers --file $1 - echo "Virtual environment created." -else - echo "Virtual environment already exists." +if [ $SKIP_CONDA -eq 1 ]; then + echo "Skipping conda environment installation" +else + export PATH=$PATH:/opt/conda/bin + if conda_env_exists "diffusers" ; then + echo "Creating virtual environment..." + conda env create --name diffusers --file $1 + echo "Virtual environment created." + else + echo "Virtual environment already exists." + fi +fi + +if [ -d "/opt/intel" ]; then + # Intel GPU: If the directory exists, we assume we are using the Intel image + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + pip install torch==2.1.0a0 \ + torchvision==0.16.0a0 \ + torchaudio==2.1.0a0 \ + intel-extension-for-pytorch==2.1.10+xpu \ + --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ + + pip install google-api-python-client \ + grpcio \ + grpcio-tools \ + diffusers==0.24.0 \ + transformers>=4.25.1 \ + accelerate \ + compel==2.0.2 \ + Pillow fi if [ "$PIP_CACHE_PURGE" = true ] ; then - export PATH=$PATH:/opt/conda/bin - - # Activate conda environment - source activate diffusers + if [ $SKIP_CONDA -ne 1 ]; then + # Activate conda environment + source activate diffusers + fi pip cache purge fi \ No newline at end of file diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index 8e3e1bbf..69b25d50 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -3,10 +3,15 @@ ## ## A bash script wrapper that runs the diffusers server with conda -export PATH=$PATH:/opt/conda/bin - -# Activate conda environment -source activate diffusers +if [ -d "/opt/intel" ]; then + # Assumes we are using the Intel oneAPI container image + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + export XPU=1 +else + export PATH=$PATH:/opt/conda/bin + # Activate conda environment + source activate diffusers +fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" diff --git a/backend/python/exllama/install.sh b/backend/python/exllama/install.sh index 702bb1fb..320e7f4d 100755 --- a/backend/python/exllama/install.sh +++ b/backend/python/exllama/install.sh @@ -3,6 +3,11 @@ set -ex export PATH=$PATH:/opt/conda/bin +if [ "$BUILD_TYPE" != "cublas" ]; then + echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation" + exit 0 +fi + # Check if environment exist conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index a6df3d37..858685b0 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -2,10 +2,14 @@ set -e ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment -export PATH=$PATH:/opt/conda/bin export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f -# Activate conda environment +if [ "$BUILD_TYPE" != "cublas" ]; then + echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation" + exit 0 +fi + +export PATH=$PATH:/opt/conda/bin source activate transformers echo $CONDA_PREFIX diff --git a/backend/python/mamba/install.sh b/backend/python/mamba/install.sh index e56b83c2..4ef26ece 100755 --- a/backend/python/mamba/install.sh +++ b/backend/python/mamba/install.sh @@ -2,13 +2,14 @@ set -e ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment -export PATH=$PATH:/opt/conda/bin if [ "$BUILD_TYPE" != "cublas" ]; then echo "[mamba] Attention!!! nvcc is required - skipping installation" exit 0 fi +export PATH=$PATH:/opt/conda/bin + # Activate conda environment source activate transformers diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile index 4bd07b11..aa7778e1 100644 --- a/backend/python/petals/Makefile +++ b/backend/python/petals/Makefile @@ -1,7 +1,7 @@ .PHONY: petals petals: @echo "Creating virtual environment..." - @conda env create --name petals --file petals.yml + bash install.sh "petals.yml" @echo "Virtual environment created." .PHONY: run diff --git a/backend/python/petals/install.sh b/backend/python/petals/install.sh new file mode 100644 index 00000000..97bcbb8a --- /dev/null +++ b/backend/python/petals/install.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +export PATH=$PATH:/opt/conda/bin + +conda env create --name petals --file $1 \ No newline at end of file diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh index e6a42b7e..d09c1f5c 100755 --- a/backend/python/transformers/run.sh +++ b/backend/python/transformers/run.sh @@ -3,10 +3,16 @@ ## ## A bash script wrapper that runs the transformers server with conda -export PATH=$PATH:/opt/conda/bin -# Activate conda environment -source activate transformers +if [ -d "/opt/intel" ]; then + # Assumes we are using the Intel oneAPI container image + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + export XPU=1 +else + export PATH=$PATH:/opt/conda/bin + # Activate conda environment + source activate transformers +fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index fe0b815a..41112c44 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -16,7 +16,15 @@ import backend_pb2_grpc import grpc import torch import torch.cuda -from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed + +XPU=os.environ.get("XPU", "0") == "1" +if XPU: + import intel_extension_for_pytorch as ipex + from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM + from transformers import AutoTokenizer, AutoModel, set_seed +else: + from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed + _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -69,12 +77,25 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): model_name = request.Model try: if request.Type == "AutoModelForCausalLM": - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) + if XPU: + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, + device_map="xpu", load_in_4bit=True) + else: + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) else: self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.CUDA = False + self.XPU = False + + if XPU: + self.XPU = True + try: + print("Optimizing model", model_name, "to XPU.", file=sys.stderr) + self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") + except Exception as err: + print("Not using XPU:", err, file=sys.stderr) if request.CUDA or torch.cuda.is_available(): try: @@ -139,6 +160,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): inputs = self.tokenizer(request.Prompt, return_tensors="pt").input_ids if self.CUDA: inputs = inputs.to("cuda") + if XPU: + inputs = inputs.to("xpu") outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP) diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile index 4804f12f..8f34f559 100644 --- a/backend/python/vall-e-x/Makefile +++ b/backend/python/vall-e-x/Makefile @@ -1,3 +1,7 @@ +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + .PHONY: ttsvalle ttsvalle: $(MAKE) -C ../common-env/transformers diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh index 26ccdccd..a9c4117e 100644 --- a/backend/python/vall-e-x/install.sh +++ b/backend/python/vall-e-x/install.sh @@ -2,13 +2,16 @@ ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment -export PATH=$PATH:/opt/conda/bin export SHA=3faaf8ccadb154d63b38070caf518ce9309ea0f4 -# Activate conda environment -source activate transformers +SKIP_CONDA=${SKIP_CONDA:-0} -echo $CONDA_PREFIX +if [ $SKIP_CONDA -ne 1 ]; then + source activate transformers +else + export PATH=$PATH:/opt/conda/bin + CONDA_PREFIX=$PWD +fi git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && popd diff --git a/core/backend/image.go b/core/backend/image.go index 60db48f9..79b8d4ba 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -8,27 +8,18 @@ import ( ) func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { - + threads := backendConfig.Threads + if threads == 0 && appConfig.Threads != 0 { + threads = appConfig.Threads + } + gRPCOpts := gRPCModelOpts(backendConfig) opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), - model.WithThreads(uint32(backendConfig.Threads)), + model.WithThreads(uint32(threads)), model.WithContext(appConfig.Context), model.WithModel(backendConfig.Model), - model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{ - CUDA: backendConfig.CUDA || backendConfig.Diffusers.CUDA, - SchedulerType: backendConfig.Diffusers.SchedulerType, - PipelineType: backendConfig.Diffusers.PipelineType, - CFGScale: backendConfig.Diffusers.CFGScale, - LoraAdapter: backendConfig.LoraAdapter, - LoraScale: backendConfig.LoraScale, - LoraBase: backendConfig.LoraBase, - IMG2IMG: backendConfig.Diffusers.IMG2IMG, - CLIPModel: backendConfig.Diffusers.ClipModel, - CLIPSubfolder: backendConfig.Diffusers.ClipSubFolder, - CLIPSkip: int32(backendConfig.Diffusers.ClipSkip), - ControlNet: backendConfig.Diffusers.ControlNet, - }), + model.WithLoadGRPCLoadModelOpts(gRPCOpts), }) inferenceModel, err := loader.BackendLoader( diff --git a/core/backend/llm.go b/core/backend/llm.go index f16878c0..54e26188 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -28,7 +28,10 @@ type TokenUsage struct { func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { modelFile := c.Model - + threads := c.Threads + if threads == 0 && o.Threads != 0 { + threads = o.Threads + } grpcOpts := gRPCModelOpts(c) var inferenceModel grpc.Backend @@ -36,7 +39,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode opts := modelOpts(c, o, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), - model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup + model.WithThreads(uint32(threads)), // some models uses this to allocate threads during startup model.WithAssetDir(o.AssetsDestination), model.WithModel(modelFile), model.WithContext(o.Context), diff --git a/core/backend/options.go b/core/backend/options.go index d2bbb2b8..3af6f679 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -40,11 +40,23 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { } return &pb.ModelOptions{ + CUDA: c.CUDA || c.Diffusers.CUDA, + SchedulerType: c.Diffusers.SchedulerType, + PipelineType: c.Diffusers.PipelineType, + CFGScale: c.Diffusers.CFGScale, + LoraAdapter: c.LoraAdapter, + LoraScale: c.LoraScale, + F16Memory: c.F16, + LoraBase: c.LoraBase, + IMG2IMG: c.Diffusers.IMG2IMG, + CLIPModel: c.Diffusers.ClipModel, + CLIPSubfolder: c.Diffusers.ClipSubFolder, + CLIPSkip: int32(c.Diffusers.ClipSkip), + ControlNet: c.Diffusers.ControlNet, ContextSize: int32(c.ContextSize), Seed: int32(c.Seed), NBatch: int32(b), NoMulMatQ: c.NoMulMatQ, - CUDA: c.CUDA, // diffusers, transformers DraftModel: c.DraftModel, AudioPath: c.VallE.AudioPath, Quantization: c.Quantization, @@ -58,12 +70,8 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { YarnAttnFactor: c.YarnAttnFactor, YarnBetaFast: c.YarnBetaFast, YarnBetaSlow: c.YarnBetaSlow, - LoraAdapter: c.LoraAdapter, - LoraBase: c.LoraBase, - LoraScale: c.LoraScale, NGQA: c.NGQA, RMSNormEps: c.RMSNormEps, - F16Memory: c.F16, MLock: c.MMlock, RopeFreqBase: c.RopeFreqBase, RopeScaling: c.RopeScaling, diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index fce44fe1..1e2af8f9 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -69,6 +69,13 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string return fmt.Sprintf("127.0.0.1:%d", port), nil } + // If no specific model path is set for transformers/HF, set it to the model path + for _, env := range []string{"HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"} { + if os.Getenv(env) == "" { + os.Setenv(env, ml.ModelPath) + } + } + // Check if the backend is provided as external if uri, ok := o.externalBackends[backend]; ok { log.Debug().Msgf("Loading external backend: %s", uri) From dc919e08e83f02f5e4b05655094bd1440cb0837a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 8 Mar 2024 08:21:25 +0100 Subject: [PATCH 0126/2895] :arrow_up: Update ggerganov/llama.cpp (#1811) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f91fb47e..3c33db5d 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=e25fb4b18fcedb9bed6be4585cf842e9a669b28b +CPPLLAMA_VERSION?=6cdabe652695167263c8b447520987b11856f7ca # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 595a73fce4839f5d383713b631743b14827083ef Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 9 Mar 2024 09:27:06 +0100 Subject: [PATCH 0127/2895] :arrow_up: Update ggerganov/llama.cpp (#1813) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3c33db5d..efd89a25 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=6cdabe652695167263c8b447520987b11856f7ca +CPPLLAMA_VERSION?=515f7d0d4fce41c752fc253acf30707c3be2531e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a08cc5adbb16adb8027cb0526e1b14a9473c9721 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 10 Mar 2024 09:32:09 +0100 Subject: [PATCH 0128/2895] :arrow_up: Update ggerganov/llama.cpp (#1816) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index efd89a25..ed3e2683 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=515f7d0d4fce41c752fc253acf30707c3be2531e +CPPLLAMA_VERSION?=77d1ac7e00bf049b9f2bba1b5a310a78318c49c4 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 8e57f4df318d3a2f126cc89acb99c37b9abc6557 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 11 Mar 2024 00:02:37 +0100 Subject: [PATCH 0129/2895] :arrow_up: Update ggerganov/llama.cpp (#1818) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ed3e2683..4e6d61fc 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=77d1ac7e00bf049b9f2bba1b5a310a78318c49c4 +CPPLLAMA_VERSION?=7ab7b733bb48250b2df26c12b00256ef42c76932 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From bc8f648a91a2f07b061b3ae1ee1944131c61cda2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 11 Mar 2024 19:49:03 +0100 Subject: [PATCH 0130/2895] fix(doc/examples): set defaults to mirostat (#1820) The default sampler on some models don't return enough candidates which leads to a false sense of randomness. Tracing back the code it looks that with the temperature sampler there might not be enough candidates to pick from, and since the seed and "randomness" take effect while picking a good candidate this yields to the same results over and over. Fixes https://github.com/mudler/LocalAI/issues/1723 by updating the examples and documentation to use mirostat instead. --- docs/content/docs/advanced/advanced-usage.md | 7 ++++--- embedded/models/codellama-7b-gguf.yaml | 6 +++++- embedded/models/dolphin-2.5-mixtral-8x7b.yaml | 5 ++++- embedded/models/llava.yaml | 3 +++ embedded/models/mistral-openorca.yaml | 4 ++++ embedded/models/mixtral-instruct.yaml | 4 ++++ examples/configurations/phi-2.yaml | 4 ++++ 7 files changed, 28 insertions(+), 5 deletions(-) diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index a892cc36..c9926bab 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -130,13 +130,14 @@ parameters: typical_p: tfz: frequency_penalty: - mirostat_eta: - mirostat_tau: - mirostat: + rope_freq_base: rope_freq_scale: negative_prompt_scale: +mirostat_eta: +mirostat_tau: +mirostat: # Default context size context_size: 512 # Default number of threads diff --git a/embedded/models/codellama-7b-gguf.yaml b/embedded/models/codellama-7b-gguf.yaml index d2a6b518..413c838b 100644 --- a/embedded/models/codellama-7b-gguf.yaml +++ b/embedded/models/codellama-7b-gguf.yaml @@ -2,10 +2,14 @@ name: codellama-7b-gguf backend: transformers parameters: model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf - temperature: 0.2 + temperature: 0.5 top_k: 40 seed: -1 top_p: 0.95 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + context_size: 4096 f16: true gpu_layers: 90 diff --git a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml index ba6020c0..12ee1efc 100644 --- a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml +++ b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml @@ -2,10 +2,13 @@ name: dolphin-mixtral-8x7b mmap: true parameters: model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf - temperature: 0.2 + temperature: 0.5 top_k: 40 top_p: 0.95 seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} diff --git a/embedded/models/llava.yaml b/embedded/models/llava.yaml index 2e571f21..3d240681 100644 --- a/embedded/models/llava.yaml +++ b/embedded/models/llava.yaml @@ -18,6 +18,9 @@ parameters: top_k: 40 top_p: 0.95 seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 template: chat: | diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml index f40d854f..0794a69b 100644 --- a/embedded/models/mistral-openorca.yaml +++ b/embedded/models/mistral-openorca.yaml @@ -6,6 +6,10 @@ parameters: top_k: 40 top_p: 0.95 seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} diff --git a/embedded/models/mixtral-instruct.yaml b/embedded/models/mixtral-instruct.yaml index 3272557a..246b2324 100644 --- a/embedded/models/mixtral-instruct.yaml +++ b/embedded/models/mixtral-instruct.yaml @@ -6,6 +6,10 @@ parameters: top_k: 40 seed: -1 top_p: 0.95 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + template: chat: &chat | [INST] {{.Input}} [/INST] diff --git a/examples/configurations/phi-2.yaml b/examples/configurations/phi-2.yaml index cac1e9da..e5a13442 100644 --- a/examples/configurations/phi-2.yaml +++ b/examples/configurations/phi-2.yaml @@ -11,6 +11,10 @@ parameters: top_k: 40 top_p: 0.95 seed: -1 + +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 template: chat: &template |- Instruct: {{.Input}} From f895d066055728e2744044ce6390a222bc24d095 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 13 Mar 2024 10:05:30 +0100 Subject: [PATCH 0131/2895] fix(config): set better defaults for inferencing (#1822) * fix(defaults): set better defaults for inferencing This changeset aim to have better defaults and to properly detect when no inference settings are provided with the model. If not specified, we defaults to mirostat sampling, and offload all the GPU layers (if a GPU is detected). Related to https://github.com/mudler/LocalAI/issues/1373 and https://github.com/mudler/LocalAI/issues/1723 * Adapt tests * Also pre-initialize default seed --- core/backend/embeddings.go | 2 +- core/backend/image.go | 6 +- core/backend/llm.go | 6 +- core/backend/options.go | 45 ++--- core/backend/transcript.go | 4 +- core/config/backend_config.go | 240 +++++++++++++++++++------- core/http/api_test.go | 2 +- core/http/endpoints/localai/tts.go | 9 +- core/http/endpoints/openai/image.go | 2 +- core/http/endpoints/openai/request.go | 33 ++-- core/schema/prediction.go | 17 +- main.go | 2 +- 12 files changed, 235 insertions(+), 133 deletions(-) diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 0a74ea4c..94310854 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -23,7 +23,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendCo opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), - model.WithThreads(uint32(backendConfig.Threads)), + model.WithThreads(uint32(*backendConfig.Threads)), model.WithAssetDir(appConfig.AssetsDestination), model.WithModel(modelFile), model.WithContext(appConfig.Context), diff --git a/core/backend/image.go b/core/backend/image.go index 79b8d4ba..b0cffb0b 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -9,14 +9,14 @@ import ( func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { threads := backendConfig.Threads - if threads == 0 && appConfig.Threads != 0 { - threads = appConfig.Threads + if *threads == 0 && appConfig.Threads != 0 { + threads = &appConfig.Threads } gRPCOpts := gRPCModelOpts(backendConfig) opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), - model.WithThreads(uint32(threads)), + model.WithThreads(uint32(*threads)), model.WithContext(appConfig.Context), model.WithModel(backendConfig.Model), model.WithLoadGRPCLoadModelOpts(gRPCOpts), diff --git a/core/backend/llm.go b/core/backend/llm.go index 54e26188..d5e14df0 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -29,8 +29,8 @@ type TokenUsage struct { func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { modelFile := c.Model threads := c.Threads - if threads == 0 && o.Threads != 0 { - threads = o.Threads + if *threads == 0 && o.Threads != 0 { + threads = &o.Threads } grpcOpts := gRPCModelOpts(c) @@ -39,7 +39,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode opts := modelOpts(c, o, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), - model.WithThreads(uint32(threads)), // some models uses this to allocate threads during startup + model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup model.WithAssetDir(o.AssetsDestination), model.WithModel(modelFile), model.WithContext(o.Context), diff --git a/core/backend/options.go b/core/backend/options.go index 3af6f679..bc7fa5a4 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -46,15 +46,15 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { CFGScale: c.Diffusers.CFGScale, LoraAdapter: c.LoraAdapter, LoraScale: c.LoraScale, - F16Memory: c.F16, + F16Memory: *c.F16, LoraBase: c.LoraBase, IMG2IMG: c.Diffusers.IMG2IMG, CLIPModel: c.Diffusers.ClipModel, CLIPSubfolder: c.Diffusers.ClipSubFolder, CLIPSkip: int32(c.Diffusers.ClipSkip), ControlNet: c.Diffusers.ControlNet, - ContextSize: int32(c.ContextSize), - Seed: int32(c.Seed), + ContextSize: int32(*c.ContextSize), + Seed: int32(*c.Seed), NBatch: int32(b), NoMulMatQ: c.NoMulMatQ, DraftModel: c.DraftModel, @@ -72,18 +72,18 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { YarnBetaSlow: c.YarnBetaSlow, NGQA: c.NGQA, RMSNormEps: c.RMSNormEps, - MLock: c.MMlock, + MLock: *c.MMlock, RopeFreqBase: c.RopeFreqBase, RopeScaling: c.RopeScaling, Type: c.ModelType, RopeFreqScale: c.RopeFreqScale, NUMA: c.NUMA, Embeddings: c.Embeddings, - LowVRAM: c.LowVRAM, - NGPULayers: int32(c.NGPULayers), - MMap: c.MMap, + LowVRAM: *c.LowVRAM, + NGPULayers: int32(*c.NGPULayers), + MMap: *c.MMap, MainGPU: c.MainGPU, - Threads: int32(c.Threads), + Threads: int32(*c.Threads), TensorSplit: c.TensorSplit, // AutoGPTQ ModelBaseName: c.AutoGPTQ.ModelBaseName, @@ -102,36 +102,37 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption os.MkdirAll(filepath.Dir(p), 0755) promptCachePath = p } + return &pb.PredictOptions{ - Temperature: float32(c.Temperature), - TopP: float32(c.TopP), + Temperature: float32(*c.Temperature), + TopP: float32(*c.TopP), NDraft: c.NDraft, - TopK: int32(c.TopK), - Tokens: int32(c.Maxtokens), - Threads: int32(c.Threads), + TopK: int32(*c.TopK), + Tokens: int32(*c.Maxtokens), + Threads: int32(*c.Threads), PromptCacheAll: c.PromptCacheAll, PromptCacheRO: c.PromptCacheRO, PromptCachePath: promptCachePath, - F16KV: c.F16, - DebugMode: c.Debug, + F16KV: *c.F16, + DebugMode: *c.Debug, Grammar: c.Grammar, NegativePromptScale: c.NegativePromptScale, RopeFreqBase: c.RopeFreqBase, RopeFreqScale: c.RopeFreqScale, NegativePrompt: c.NegativePrompt, - Mirostat: int32(c.LLMConfig.Mirostat), - MirostatETA: float32(c.LLMConfig.MirostatETA), - MirostatTAU: float32(c.LLMConfig.MirostatTAU), - Debug: c.Debug, + Mirostat: int32(*c.LLMConfig.Mirostat), + MirostatETA: float32(*c.LLMConfig.MirostatETA), + MirostatTAU: float32(*c.LLMConfig.MirostatTAU), + Debug: *c.Debug, StopPrompts: c.StopWords, Repeat: int32(c.RepeatPenalty), NKeep: int32(c.Keep), Batch: int32(c.Batch), IgnoreEOS: c.IgnoreEOS, - Seed: int32(c.Seed), + Seed: int32(*c.Seed), FrequencyPenalty: float32(c.FrequencyPenalty), - MLock: c.MMlock, - MMap: c.MMap, + MLock: *c.MMlock, + MMap: *c.MMap, MainGPU: c.MainGPU, TensorSplit: c.TensorSplit, TailFreeSamplingZ: float32(c.TFZ), diff --git a/core/backend/transcript.go b/core/backend/transcript.go index bbb4f4b4..4c3859df 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -17,7 +17,7 @@ func ModelTranscription(audio, language string, ml *model.ModelLoader, backendCo model.WithBackendString(model.WhisperBackend), model.WithModel(backendConfig.Model), model.WithContext(appConfig.Context), - model.WithThreads(uint32(backendConfig.Threads)), + model.WithThreads(uint32(*backendConfig.Threads)), model.WithAssetDir(appConfig.AssetsDestination), }) @@ -33,6 +33,6 @@ func ModelTranscription(audio, language string, ml *model.ModelLoader, backendCo return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{ Dst: audio, Language: language, - Threads: uint32(backendConfig.Threads), + Threads: uint32(*backendConfig.Threads), }) } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 63e5855c..53326b3f 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "io/fs" + "math/rand" "os" "path/filepath" "strings" @@ -20,9 +21,9 @@ type BackendConfig struct { schema.PredictionOptions `yaml:"parameters"` Name string `yaml:"name"` - F16 bool `yaml:"f16"` - Threads int `yaml:"threads"` - Debug bool `yaml:"debug"` + F16 *bool `yaml:"f16"` + Threads *int `yaml:"threads"` + Debug *bool `yaml:"debug"` Roles map[string]string `yaml:"roles"` Embeddings bool `yaml:"embeddings"` Backend string `yaml:"backend"` @@ -105,20 +106,20 @@ type LLMConfig struct { PromptCachePath string `yaml:"prompt_cache_path"` PromptCacheAll bool `yaml:"prompt_cache_all"` PromptCacheRO bool `yaml:"prompt_cache_ro"` - MirostatETA float64 `yaml:"mirostat_eta"` - MirostatTAU float64 `yaml:"mirostat_tau"` - Mirostat int `yaml:"mirostat"` - NGPULayers int `yaml:"gpu_layers"` - MMap bool `yaml:"mmap"` - MMlock bool `yaml:"mmlock"` - LowVRAM bool `yaml:"low_vram"` + MirostatETA *float64 `yaml:"mirostat_eta"` + MirostatTAU *float64 `yaml:"mirostat_tau"` + Mirostat *int `yaml:"mirostat"` + NGPULayers *int `yaml:"gpu_layers"` + MMap *bool `yaml:"mmap"` + MMlock *bool `yaml:"mmlock"` + LowVRAM *bool `yaml:"low_vram"` Grammar string `yaml:"grammar"` StopWords []string `yaml:"stopwords"` Cutstrings []string `yaml:"cutstrings"` TrimSpace []string `yaml:"trimspace"` TrimSuffix []string `yaml:"trimsuffix"` - ContextSize int `yaml:"context_size"` + ContextSize *int `yaml:"context_size"` NUMA bool `yaml:"numa"` LoraAdapter string `yaml:"lora_adapter"` LoraBase string `yaml:"lora_base"` @@ -185,19 +186,96 @@ func (c *BackendConfig) FunctionToCall() string { return c.functionCallNameString } -func defaultPredictOptions(modelFile string) schema.PredictionOptions { - return schema.PredictionOptions{ - TopP: 0.7, - TopK: 80, - Maxtokens: 512, - Temperature: 0.9, - Model: modelFile, - } -} +func (cfg *BackendConfig) SetDefaults(debug bool, threads, ctx int, f16 bool) { + defaultTopP := 0.7 + defaultTopK := 80 + defaultTemp := 0.9 + defaultMaxTokens := 2048 + defaultMirostat := 2 + defaultMirostatTAU := 5.0 + defaultMirostatETA := 0.1 -func DefaultConfig(modelFile string) *BackendConfig { - return &BackendConfig{ - PredictionOptions: defaultPredictOptions(modelFile), + // Try to offload all GPU layers (if GPU is found) + defaultNGPULayers := 99999999 + + trueV := true + falseV := false + + if cfg.Seed == nil { + // random number generator seed + defaultSeed := int(rand.Int31()) + cfg.Seed = &defaultSeed + } + + if cfg.TopK == nil { + cfg.TopK = &defaultTopK + } + + if cfg.MMap == nil { + // MMap is enabled by default + cfg.MMap = &trueV + } + + if cfg.MMlock == nil { + // MMlock is disabled by default + cfg.MMlock = &falseV + } + + if cfg.TopP == nil { + cfg.TopP = &defaultTopP + } + if cfg.Temperature == nil { + cfg.Temperature = &defaultTemp + } + + if cfg.Maxtokens == nil { + cfg.Maxtokens = &defaultMaxTokens + } + + if cfg.Mirostat == nil { + cfg.Mirostat = &defaultMirostat + } + + if cfg.MirostatETA == nil { + cfg.MirostatETA = &defaultMirostatETA + } + + if cfg.MirostatTAU == nil { + cfg.MirostatTAU = &defaultMirostatTAU + } + if cfg.NGPULayers == nil { + cfg.NGPULayers = &defaultNGPULayers + } + + if cfg.LowVRAM == nil { + cfg.LowVRAM = &falseV + } + + // Value passed by the top level are treated as default (no implicit defaults) + // defaults are set by the user + if ctx == 0 { + ctx = 1024 + } + + if cfg.ContextSize == nil { + cfg.ContextSize = &ctx + } + + if threads == 0 { + // Threads can't be 0 + threads = 4 + } + + if cfg.Threads == nil { + cfg.Threads = &threads + } + + if cfg.F16 == nil { + cfg.F16 = &f16 + } + + if debug { + cfg.Debug = &debug } } @@ -208,23 +286,63 @@ type BackendConfigLoader struct { sync.Mutex } +type LoadOptions struct { + debug bool + threads, ctxSize int + f16 bool +} + +func LoadOptionDebug(debug bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.debug = debug + } +} + +func LoadOptionThreads(threads int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.threads = threads + } +} + +func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.ctxSize = ctxSize + } +} + +func LoadOptionF16(f16 bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.f16 = f16 + } +} + +type ConfigLoaderOption func(*LoadOptions) + +func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { + for _, l := range options { + l(lo) + } +} + // Load a config file for a model -func LoadBackendConfigFileByName(modelName, modelPath string, cl *BackendConfigLoader, debug bool, threads, ctx int, f16 bool) (*BackendConfig, error) { +func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + + lo := &LoadOptions{} + lo.Apply(opts...) + // Load a config file if present after the model name - modelConfig := filepath.Join(modelPath, modelName+".yaml") - - var cfg *BackendConfig - - defaults := func() { - cfg = DefaultConfig(modelName) - cfg.ContextSize = ctx - cfg.Threads = threads - cfg.F16 = f16 - cfg.Debug = debug + cfg := &BackendConfig{ + PredictionOptions: schema.PredictionOptions{ + Model: modelName, + }, } cfgExisting, exists := cl.GetBackendConfig(modelName) - if !exists { + if exists { + cfg = &cfgExisting + } else { + // Try loading a model config file + modelConfig := filepath.Join(modelPath, modelName+".yaml") if _, err := os.Stat(modelConfig); err == nil { if err := cl.LoadBackendConfig(modelConfig); err != nil { return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) @@ -232,32 +350,11 @@ func LoadBackendConfigFileByName(modelName, modelPath string, cl *BackendConfigL cfgExisting, exists = cl.GetBackendConfig(modelName) if exists { cfg = &cfgExisting - } else { - defaults() } - } else { - defaults() - } - } else { - cfg = &cfgExisting - } - - // Set the parameters for the language model prediction - //updateConfig(cfg, input) - - // Don't allow 0 as setting - if cfg.Threads == 0 { - if threads != 0 { - cfg.Threads = threads - } else { - cfg.Threads = 4 } } - // Enforce debug flag if passed from CLI - if debug { - cfg.Debug = true - } + cfg.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16) return cfg, nil } @@ -267,7 +364,10 @@ func NewBackendConfigLoader() *BackendConfigLoader { configs: make(map[string]BackendConfig), } } -func ReadBackendConfigFile(file string) ([]*BackendConfig, error) { +func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { + lo := &LoadOptions{} + lo.Apply(opts...) + c := &[]*BackendConfig{} f, err := os.ReadFile(file) if err != nil { @@ -277,10 +377,17 @@ func ReadBackendConfigFile(file string) ([]*BackendConfig, error) { return nil, fmt.Errorf("cannot unmarshal config file: %w", err) } + for _, cc := range *c { + cc.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16) + } + return *c, nil } -func ReadBackendConfig(file string) (*BackendConfig, error) { +func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + lo := &LoadOptions{} + lo.Apply(opts...) + c := &BackendConfig{} f, err := os.ReadFile(file) if err != nil { @@ -290,13 +397,14 @@ func ReadBackendConfig(file string) (*BackendConfig, error) { return nil, fmt.Errorf("cannot unmarshal config file: %w", err) } + c.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16) return c, nil } -func (cm *BackendConfigLoader) LoadBackendConfigFile(file string) error { +func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { cm.Lock() defer cm.Unlock() - c, err := ReadBackendConfigFile(file) + c, err := ReadBackendConfigFile(file, opts...) if err != nil { return fmt.Errorf("cannot load config file: %w", err) } @@ -307,10 +415,10 @@ func (cm *BackendConfigLoader) LoadBackendConfigFile(file string) error { return nil } -func (cl *BackendConfigLoader) LoadBackendConfig(file string) error { +func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { cl.Lock() defer cl.Unlock() - c, err := ReadBackendConfig(file) + c, err := ReadBackendConfig(file, opts...) if err != nil { return fmt.Errorf("cannot read config file: %w", err) } @@ -407,7 +515,9 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { return nil } -func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string) error { +// LoadBackendConfigsFromPath reads all the configurations of the models from a path +// (non-recursive) +func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { cm.Lock() defer cm.Unlock() entries, err := os.ReadDir(path) @@ -427,7 +537,7 @@ func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string) error { if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { continue } - c, err := ReadBackendConfig(filepath.Join(path, file.Name())) + c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) if err == nil { cm.configs[c.Name] = *c } diff --git a/core/http/api_test.go b/core/http/api_test.go index 8f3cfc91..b0579a19 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -386,7 +386,7 @@ var _ = Describe("API test", func() { var res map[string]string err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) - Expect(res["location"]).To(Equal("San Francisco, California, United States"), fmt.Sprint(res)) + Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 84fb7a55..9c3f890d 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -26,7 +26,14 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } - cfg, err := config.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, cl, false, 0, 0, false) + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + if err != nil { modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 8f535801..d59b1051 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -196,7 +196,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon baseURL := c.BaseURL() - fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig) + fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig) if err != nil { return err } diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index 46ff2438..505244c4 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -74,10 +74,10 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque if input.Echo { config.Echo = input.Echo } - if input.TopK != 0 { + if input.TopK != nil { config.TopK = input.TopK } - if input.TopP != 0 { + if input.TopP != nil { config.TopP = input.TopP } @@ -117,11 +117,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.Grammar = input.Grammar } - if input.Temperature != 0 { + if input.Temperature != nil { config.Temperature = input.Temperature } - if input.Maxtokens != 0 { + if input.Maxtokens != nil { config.Maxtokens = input.Maxtokens } @@ -193,30 +193,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.Batch = input.Batch } - if input.F16 { - config.F16 = input.F16 - } - if input.IgnoreEOS { config.IgnoreEOS = input.IgnoreEOS } - if input.Seed != 0 { + if input.Seed != nil { config.Seed = input.Seed } - if input.Mirostat != 0 { - config.LLMConfig.Mirostat = input.Mirostat - } - - if input.MirostatETA != 0 { - config.LLMConfig.MirostatETA = input.MirostatETA - } - - if input.MirostatTAU != 0 { - config.LLMConfig.MirostatTAU = input.MirostatTAU - } - if input.TypicalP != 0 { config.TypicalP = input.TypicalP } @@ -272,7 +256,12 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque } func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { - cfg, err := config.LoadBackendConfigFileByName(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16) + cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath, + config.LoadOptionDebug(debug), + config.LoadOptionThreads(threads), + config.LoadOptionContextSize(ctx), + config.LoadOptionF16(f16), + ) // Set the parameters for the language model prediction updateRequestConfig(cfg, input) diff --git a/core/schema/prediction.go b/core/schema/prediction.go index efd085a4..d75e5eb8 100644 --- a/core/schema/prediction.go +++ b/core/schema/prediction.go @@ -12,28 +12,23 @@ type PredictionOptions struct { N int `json:"n"` // Common options between all the API calls, part of the OpenAI spec - TopP float64 `json:"top_p" yaml:"top_p"` - TopK int `json:"top_k" yaml:"top_k"` - Temperature float64 `json:"temperature" yaml:"temperature"` - Maxtokens int `json:"max_tokens" yaml:"max_tokens"` - Echo bool `json:"echo"` + TopP *float64 `json:"top_p" yaml:"top_p"` + TopK *int `json:"top_k" yaml:"top_k"` + Temperature *float64 `json:"temperature" yaml:"temperature"` + Maxtokens *int `json:"max_tokens" yaml:"max_tokens"` + Echo bool `json:"echo"` // Custom parameters - not present in the OpenAI API Batch int `json:"batch" yaml:"batch"` - F16 bool `json:"f16" yaml:"f16"` IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"` RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"` Keep int `json:"n_keep" yaml:"n_keep"` - MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"` - MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"` - Mirostat int `json:"mirostat" yaml:"mirostat"` - FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` TFZ float64 `json:"tfz" yaml:"tfz"` TypicalP float64 `json:"typical_p" yaml:"typical_p"` - Seed int `json:"seed" yaml:"seed"` + Seed *int `json:"seed" yaml:"seed"` NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"` diff --git a/main.go b/main.go index 237191cf..21560e5a 100644 --- a/main.go +++ b/main.go @@ -497,7 +497,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit return errors.New("model not found") } - c.Threads = threads + c.Threads = &threads defer ml.StopAllGRPC() From 5c5f07c1e7d9fd2216096939e257777092667883 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 13 Mar 2024 10:05:46 +0100 Subject: [PATCH 0132/2895] :arrow_up: Update ggerganov/llama.cpp (#1821) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4e6d61fc..d4f61d3b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=7ab7b733bb48250b2df26c12b00256ef42c76932 +CPPLLAMA_VERSION?=306d34be7ad19e768975409fc80791a274ea0230 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 57222497ecd93eac5e0268af6dd4bc4901c9cba7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 13 Mar 2024 17:57:45 +0100 Subject: [PATCH 0133/2895] fix(docker-compose): update docker compose file (#1824) Signed-off-by: Ettore Di Giacinto --- .env | 2 +- docker-compose.yaml | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.env b/.env index a31e1e37..82a64e3d 100644 --- a/.env +++ b/.env @@ -18,7 +18,7 @@ ## Default path for models # -MODELS_PATH=/models +# MODELS_PATH=/models ## Enable debug mode # DEBUG=true diff --git a/docker-compose.yaml b/docker-compose.yaml index b24d3c04..b6384c52 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,15 +2,30 @@ version: '3.6' services: api: - image: quay.io/go-skynet/local-ai:latest + # See https://localai.io/basics/getting_started/#container-images for + # a list of available container images (or build your own with the provided Dockerfile) + # Available images with CUDA, ROCm, SYCL + # Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags + # Image list (dockerhub): https://hub.docker.com/r/localai/localai + image: quay.io/go-skynet/local-ai:master-ffmpeg-core build: context: . dockerfile: Dockerfile + args: + - IMAGE_TYPE=core + - BASE_IMAGE=ubuntu:22.04 ports: - 8080:8080 env_file: - .env + environment: + - MODELS_PATH=/models + # - DEBUG=true volumes: - ./models:/models:cached - ./images/:/tmp/generated/images/ - command: ["/usr/bin/local-ai" ] + command: + # Here we can specify a list of models to run (see quickstart https://localai.io/basics/getting_started/#running-models ) + # or an URL pointing to a YAML configuration file, for example: + # - https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml + - phi-2 From b9e77d394b1cc96f578e1ad5f7dae8978351bfb1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 13 Mar 2024 21:50:46 +0100 Subject: [PATCH 0134/2895] feat(model-help): display help text in markdown (#1825) Signed-off-by: Ettore Di Giacinto --- core/config/application_config.go | 1 - core/config/backend_config.go | 24 +++++++++++++++++--- go.mod | 12 ++++++++++ go.sum | 37 +++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/core/config/application_config.go b/core/config/application_config.go index d90ae906..f25b4348 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -158,7 +158,6 @@ func WithBackendAssets(f embed.FS) AppOption { func WithStringGalleries(galls string) AppOption { return func(o *ApplicationConfig) { if galls == "" { - log.Debug().Msgf("no galleries to load") o.Galleries = []gallery.Gallery{} return } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 53326b3f..2adfeee5 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -15,6 +15,8 @@ import ( "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" "gopkg.in/yaml.v3" + + "github.com/charmbracelet/glamour" ) type BackendConfig struct { @@ -465,6 +467,20 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { log.Info().Msgf("Preloading models from %s", modelPath) + renderMode := "dark" + if os.Getenv("COLOR") != "" { + renderMode = os.Getenv("COLOR") + } + + glamText := func(t string) { + out, err := glamour.Render(t, renderMode) + if err == nil && os.Getenv("NO_COLOR") == "" { + fmt.Println(out) + } else { + fmt.Println(t) + } + } + for i, config := range cl.configs { // Download files and verify their SHA @@ -503,13 +519,15 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { cl.configs[i] = *c } if cl.configs[i].Name != "" { - log.Info().Msgf("Model name: %s", cl.configs[i].Name) + glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) } if cl.configs[i].Description != "" { - log.Info().Msgf("Model description: %s", cl.configs[i].Description) + //glamText("**Description**") + glamText(cl.configs[i].Description) } if cl.configs[i].Usage != "" { - log.Info().Msgf("Model usage: \n%s", cl.configs[i].Usage) + //glamText("**Usage**") + glamText(cl.configs[i].Usage) } } return nil diff --git a/go.mod b/go.mod index bbb90838..b218ca41 100644 --- a/go.mod +++ b/go.mod @@ -53,18 +53,28 @@ require ( ) require ( + github.com/alecthomas/chroma v0.10.0 // indirect + github.com/aymanbagabas/go-osc52 v1.0.3 // indirect + github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/charmbracelet/glamour v0.6.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect + github.com/gorilla/css v1.0.0 // indirect github.com/klauspost/pgzip v1.2.5 // indirect + github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/microcosm-cc/bluemonday v1.0.26 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect + github.com/muesli/reflow v0.3.0 // indirect + github.com/muesli/termenv v0.13.0 // indirect github.com/nwaples/rardecode v1.1.0 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect github.com/pkoukk/tiktoken-go v0.1.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -73,6 +83,8 @@ require ( github.com/prometheus/procfs v0.11.1 // indirect github.com/ulikunitz/xz v0.5.9 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect + github.com/yuin/goldmark v1.5.2 // indirect + github.com/yuin/goldmark-emoji v1.0.1 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect golang.org/x/term v0.13.0 // indirect diff --git a/go.sum b/go.sum index 84aba3a0..a3ecade2 100644 --- a/go.sum +++ b/go.sum @@ -1,12 +1,20 @@ github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= +github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= +github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/aymanbagabas/go-osc52 v1.0.3 h1:DTwqENW7X9arYimJrPeGZcV0ln14sGMt3pHZspWD+Mg= +github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4= +github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= +github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc= +github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -16,6 +24,7 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0= github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4= @@ -78,6 +87,8 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= +github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= @@ -99,6 +110,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= +github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= @@ -107,6 +120,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= @@ -114,6 +129,11 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zk github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo= github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4= +github.com/microcosm-cc/bluemonday v1.0.21/go.mod h1:ytNkv4RrDrLJ2pqlsSI46O6IVXmZOBBD4SaJyDwwTkM= +github.com/microcosm-cc/bluemonday v1.0.24 h1:NGQoPtwGVcbGkKfvyYk1yRqknzBuoMiUrO6R7uFTPlw= +github.com/microcosm-cc/bluemonday v1.0.24/go.mod h1:ArQySAMps0790cHSkdPEJ7bGkF2VePWH773hsJNSHf8= +github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58= +github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= @@ -122,6 +142,10 @@ github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGw github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0= github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks= github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= +github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= +github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= +github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0= +github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= @@ -129,6 +153,8 @@ github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWk github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= @@ -163,6 +189,7 @@ github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdO github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI= github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= @@ -189,6 +216,7 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= @@ -217,6 +245,10 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMx github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU= +github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os= +github.com/yuin/goldmark-emoji v1.0.1/go.mod h1:2w1E6FEWLcDQkoTE+7HU6QF1F6SLlNGjRIBbIZQFqkQ= github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= @@ -243,6 +275,7 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -263,6 +296,8 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -275,12 +310,14 @@ golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= From b423af001d6629de31e49ff26c50ef13843c99e7 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 14 Mar 2024 02:39:21 -0500 Subject: [PATCH 0135/2895] fix: the correct BUILD_TYPE for OpenCL is clblas (with no t) (#1828) --- backend/cpp/llama/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index d6d8ae90..da80bf12 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -12,8 +12,8 @@ ifeq ($(BUILD_TYPE),cublas) # to CMAKE_ARGS automatically else ifeq ($(BUILD_TYPE),openblas) CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path -else ifeq ($(BUILD_TYPE),clblast) +# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path +else ifeq ($(BUILD_TYPE),clblas) CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ else ifeq ($(BUILD_TYPE),hipblas) From f82065703d4a832044c2cff980b88c4f30f913a2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 14 Mar 2024 08:39:39 +0100 Subject: [PATCH 0136/2895] :arrow_up: Update ggerganov/llama.cpp (#1827) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d4f61d3b..bd565db5 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=306d34be7ad19e768975409fc80791a274ea0230 +CPPLLAMA_VERSION?=19885d205e768579ab090d1e99281cae58c21b54 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a6b540737fa60acc9cd82c33e1fd1d21dd152a55 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 14 Mar 2024 02:40:37 -0500 Subject: [PATCH 0137/2895] fix: missing OpenCL libraries from docker containers during clblas docker build (#1830) --- .dockerignore | 1 + Dockerfile | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/.dockerignore b/.dockerignore index cf963888..979a26a3 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,4 @@ models examples/chatbot-ui/models examples/rwkv/models examples/**/models +Dockerfile \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index fd365962..d9354e6d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -105,6 +105,13 @@ COPY . . COPY .git . RUN make prepare +# If we are building with clblas support, we need the libraries for the builds +RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ + apt-get update && \ + apt-get install -y libclblast-dev && \ + apt-get clean \ + ; fi + # stablediffusion does not tolerate a newer version of abseil, build it first RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build @@ -148,6 +155,13 @@ RUN if [ "${FFMPEG}" = "true" ]; then \ apt-get install -y ffmpeg && apt-get clean \ ; fi +# Add OpenCL +RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ + apt-get update && \ + apt-get install -y libclblast1 && \ + apt-get clean \ + ; fi + WORKDIR /build # we start fresh & re-copy all assets because `make build` does not clean up nicely after itself From 388213091142624a877754a4f8afe699c8b002e6 Mon Sep 17 00:00:00 2001 From: fakezeta Date: Thu, 14 Mar 2024 23:06:30 +0100 Subject: [PATCH 0138/2895] feat: Add Bitsandbytes quantization for transformer backend enhancement #1775 and fix: Transformer backend error on CUDA #1774 (#1823) * fixes #1775 and #1774 Add BitsAndBytes Quantization and fixes embedding on CUDA devices * Manage 4bit and 8 bit quantization Manage different BitsAndBytes options with the quantization: parameter in yaml * fix compilation errors on non CUDA environment --- .../transformers/transformers-nvidia.yml | 1 + .../transformers/transformers_server.py | 71 +++++++++++++------ 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index f851677e..7daafe51 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -30,6 +30,7 @@ dependencies: - async-timeout==4.0.3 - attrs==23.1.0 - bark==0.1.5 + - bitsandbytes==0.43.0 - boto3==1.28.61 - botocore==1.31.61 - certifi==2023.7.22 diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 41112c44..264e7fad 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -23,7 +23,7 @@ if XPU: from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM from transformers import AutoTokenizer, AutoModel, set_seed else: - from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed + from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -75,18 +75,50 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): A Result object that contains the result of the LoadModel operation. """ model_name = request.Model + + compute = "auto" + if request.F16Memory == True: + compute=torch.bfloat16 + + self.CUDA = request.CUDA + + device_map="cpu" + + quantization = None + + if self.CUDA: + if request.Device: + device_map=request.Device + else: + device_map="cuda:0" + if request.Quantization == "bnb_4bit": + quantization = BitsAndBytesConfig( + load_in_4bit = True, + bnb_4bit_compute_dtype = compute, + bnb_4bit_quant_type = "nf4", + bnb_4bit_use_double_quant = True, + load_in_8bit = False, + ) + elif request.Quantization == "bnb_8bit": + quantization = BitsAndBytesConfig( + load_in_4bit=False, + bnb_4bit_compute_dtype = None, + load_in_8bit=True, + ) + + try: if request.Type == "AutoModelForCausalLM": if XPU: + if quantization == "xpu_4bit": + xpu_4bit = True self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, - device_map="xpu", load_in_4bit=True) + device_map="xpu", load_in_4bit=xpu_4bit) else: - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute) else: - self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) - - self.tokenizer = AutoTokenizer.from_pretrained(model_name) - self.CUDA = False + self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute) + self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) self.XPU = False if XPU: @@ -97,13 +129,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): except Exception as err: print("Not using XPU:", err, file=sys.stderr) - if request.CUDA or torch.cuda.is_available(): - try: - print("Loading model", model_name, "to CUDA.", file=sys.stderr) - self.model = self.model.to("cuda") - self.CUDA = True - except Exception as err: - print("Not using CUDA:", err, file=sys.stderr) except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") # Implement your logic here for the LoadModel service @@ -130,13 +155,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") # Create word embeddings - model_output = self.model(**encoded_input) + if self.CUDA: + encoded_input = encoded_input.to("cuda") + + with torch.no_grad(): + model_output = self.model(**encoded_input) # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence - sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']).detach().numpy() + sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) print("Embeddings:", sentence_embeddings, file=sys.stderr) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings) + return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) def Predict(self, request, context): """ @@ -163,12 +192,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if XPU: inputs = inputs.to("xpu") - outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP) - - generated_text = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] - # Remove prompt from response if present - if request.Prompt in generated_text: - generated_text = generated_text.replace(request.Prompt, "") + outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP, do_sample=True, pad_token_id=self.tokenizer.eos_token_id) + generated_text = self.tokenizer.batch_decode(outputs[:, inputs.shape[1]:], skip_special_tokens=True)[0] return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) From 45d520f91367e32ed0b46c5e3fef821b20c4de5e Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 14 Mar 2024 18:07:47 -0400 Subject: [PATCH 0139/2895] fix: OSX Build Files for llama.cpp (#1836) bot ate my changes, seperate branch --- Makefile | 4 ++-- backend/cpp/llama/Makefile | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index bd565db5..fcebcadc 100644 --- a/Makefile +++ b/Makefile @@ -463,7 +463,7 @@ backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/ # TODO: every binary should have its own folder instead, so can have different implementations ifeq ($(BUILD_TYPE),metal) - cp backend/cpp/llama/llama.cpp/ggml-metal.metal backend-assets/grpc/ + cp backend/cpp/llama/llama.cpp/ggml-common.h backend-assets/grpc/ endif ## BACKEND CPP LLAMA START @@ -494,7 +494,7 @@ backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp # TODO: every binary should have its own folder instead, so can have different metal implementations ifeq ($(BUILD_TYPE),metal) - cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/ + cp backend/cpp/llama/llama.cpp/build/bin/ggml-common.h backend-assets/grpc/ endif backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index da80bf12..3fa84a4a 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -18,6 +18,9 @@ else ifeq ($(BUILD_TYPE),clblas) # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ else ifeq ($(BUILD_TYPE),hipblas) CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON +# If it's OSX, embed the metal library for fewer moving parts. +else ifeq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DLLAMA_METAL_EMBED_LIBRARY=ON endif ifeq ($(BUILD_TYPE),sycl_f16) From 20136ca8b70b2b5e9e7362369638f4335ec1c73a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 14 Mar 2024 23:08:34 +0100 Subject: [PATCH 0140/2895] feat(tts): add Elevenlabs and OpenAI TTS compatibility layer (#1834) * feat(elevenlabs): map elevenlabs API support to TTS This allows elevenlabs Clients to work automatically with LocalAI by supporting the elevenlabs API. The elevenlabs server endpoint is implemented such as it is wired to the TTS endpoints. Fixes: https://github.com/mudler/LocalAI/issues/1809 * feat(openai/tts): compat layer with openai tts Fixes: #1276 * fix: adapt tts CLI --- backend/backend.proto | 1 + backend/python/autogptq/backend_pb2.py | 38 ++--- backend/python/bark/backend_pb2.py | 38 ++--- backend/python/coqui/backend_pb2.py | 38 ++--- backend/python/diffusers/backend_pb2.py | 38 ++--- backend/python/exllama/backend_pb2.py | 38 ++--- backend/python/exllama2/backend_pb2.py | 38 ++--- backend/python/mamba/backend_pb2.py | 38 ++--- backend/python/petals/backend_pb2.py | 38 ++--- .../sentencetransformers/backend_pb2.py | 38 ++--- .../transformers-musicgen/backend_pb2.py | 38 ++--- backend/python/transformers/backend_pb2.py | 38 ++--- backend/python/vall-e-x/backend_pb2.py | 38 ++--- backend/python/vllm/backend_pb2.py | 38 ++--- core/backend/tts.go | 22 ++- core/http/api.go | 31 +++- core/http/endpoints/elevenlabs/tts.go | 55 ++++++ core/http/endpoints/localai/tts.go | 2 +- core/schema/elevenlabs.go | 6 + core/schema/localai.go | 1 + docs/content/docs/features/text-to-audio.md | 8 +- main.go | 10 +- pkg/grpc/proto/backend.pb.go | 160 ++++++++++-------- pkg/grpc/proto/backend_grpc.pb.go | 2 +- 24 files changed, 454 insertions(+), 338 deletions(-) create mode 100644 core/http/endpoints/elevenlabs/tts.go create mode 100644 core/schema/elevenlabs.go diff --git a/backend/backend.proto b/backend/backend.proto index a82db555..30e2f8b2 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -191,6 +191,7 @@ message TTSRequest { string text = 1; string model = 2; string dst = 3; + string voice = 4; } message TokenizationResponse { diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/autogptq/backend_pb2.py +++ b/backend/python/autogptq/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/bark/backend_pb2.py +++ b/backend/python/bark/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/coqui/backend_pb2.py +++ b/backend/python/coqui/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/diffusers/backend_pb2.py +++ b/backend/python/diffusers/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/exllama/backend_pb2.py +++ b/backend/python/exllama/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/exllama2/backend_pb2.py +++ b/backend/python/exllama2/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/mamba/backend_pb2.py +++ b/backend/python/mamba/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/petals/backend_pb2.py +++ b/backend/python/petals/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/sentencetransformers/backend_pb2.py +++ b/backend/python/sentencetransformers/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/transformers-musicgen/backend_pb2.py +++ b/backend/python/transformers-musicgen/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/transformers/backend_pb2.py +++ b/backend/python/transformers/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/vall-e-x/backend_pb2.py +++ b/backend/python/vall-e-x/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py index b101e4f4..08b896c7 100644 --- a/backend/python/vllm/backend_pb2.py +++ b/backend/python/vllm/backend_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -14,16 +13,17 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None + _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' _globals['_HEALTHMESSAGE']._serialized_start=26 _globals['_HEALTHMESSAGE']._serialized_end=41 _globals['_PREDICTOPTIONS']._serialized_start=44 @@ -45,17 +45,17 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2548 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2550 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2604 - _globals['_MEMORYUSAGEDATA']._serialized_start=2607 - _globals['_MEMORYUSAGEDATA']._serialized_end=2749 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2701 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2749 - _globals['_STATUSRESPONSE']._serialized_start=2752 - _globals['_STATUSRESPONSE']._serialized_end=2925 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2858 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2925 - _globals['_BACKEND']._serialized_start=2928 - _globals['_BACKEND']._serialized_end=3556 + _globals['_TTSREQUEST']._serialized_end=2563 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 + _globals['_MEMORYUSAGEDATA']._serialized_start=2622 + _globals['_MEMORYUSAGEDATA']._serialized_end=2764 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 + _globals['_STATUSRESPONSE']._serialized_start=2767 + _globals['_STATUSRESPONSE']._serialized_end=2940 + _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 + _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 + _globals['_BACKEND']._serialized_start=2943 + _globals['_BACKEND']._serialized_end=3571 # @@protoc_insertion_point(module_scope) diff --git a/core/backend/tts.go b/core/backend/tts.go index 85aa3457..f97b6202 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -29,7 +29,7 @@ func generateUniqueFileName(dir, baseName, ext string) string { } } -func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { +func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend @@ -44,12 +44,12 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appCon model.WithAssetDir(appConfig.AssetsDestination), model.WithLoadGRPCLoadModelOpts(grpcOpts), }) - piperModel, err := loader.BackendLoader(opts...) + ttsModel, err := loader.BackendLoader(opts...) if err != nil { return "", nil, err } - if piperModel == nil { + if ttsModel == nil { return "", nil, fmt.Errorf("could not load piper model") } @@ -57,25 +57,31 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appCon return "", nil, fmt.Errorf("failed creating audio directory: %s", err) } - fileName := generateUniqueFileName(appConfig.AudioDir, "piper", ".wav") + fileName := generateUniqueFileName(appConfig.AudioDir, "tts", ".wav") filePath := filepath.Join(appConfig.AudioDir, fileName) // If the model file is not empty, we pass it joined with the model path modelPath := "" if modelFile != "" { - if bb != model.TransformersMusicGen { - modelPath = filepath.Join(loader.ModelPath, modelFile) - if err := utils.VerifyPath(modelPath, appConfig.ModelPath); err != nil { + // If the model file is not empty, we pass it joined with the model path + // Checking first that it exists and is not outside ModelPath + // TODO: we should actually first check if the modelFile is looking like + // a FS path + mp := filepath.Join(loader.ModelPath, modelFile) + if _, err := os.Stat(mp); err == nil { + if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil { return "", nil, err } + modelPath = mp } else { modelPath = modelFile } } - res, err := piperModel.TTS(context.Background(), &proto.TTSRequest{ + res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{ Text: text, Model: modelPath, + Voice: voice, Dst: filePath, }) diff --git a/core/http/api.go b/core/http/api.go index e2646a14..8578b89e 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -6,6 +6,7 @@ import ( "os" "strings" + "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" @@ -21,6 +22,24 @@ import ( "github.com/gofiber/fiber/v2/middleware/recover" ) +func readAuthHeader(c *fiber.Ctx) string { + authHeader := c.Get("Authorization") + + // elevenlabs + xApiKey := c.Get("xi-api-key") + if xApiKey != "" { + authHeader = "Bearer " + xApiKey + } + + // anthropic + xApiKey = c.Get("x-api-key") + if xApiKey != "" { + authHeader = "Bearer " + xApiKey + } + + return authHeader +} + func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ @@ -94,10 +113,12 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi return c.Next() } - authHeader := c.Get("Authorization") + authHeader := readAuthHeader(c) if authHeader == "" { return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"}) } + + // If it's a bearer token authHeaderParts := strings.Split(authHeader, " ") if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" { return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"}) @@ -111,7 +132,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi } return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"}) - } if appConfig.CORS { @@ -147,6 +167,11 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) + app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) + + // Elevenlabs + app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) + // openAI compatible API endpoint // chat @@ -181,7 +206,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // audio app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) - app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) // images app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go new file mode 100644 index 00000000..b70c8de4 --- /dev/null +++ b/core/http/endpoints/elevenlabs/tts.go @@ -0,0 +1,55 @@ +package elevenlabs + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/model" + + "github.com/go-skynet/LocalAI/core/schema" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" +) + +func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + + input := new(schema.ElevenLabsTTSRequest) + voiceID := c.Params("voice-id") + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false) + if err != nil { + modelFile = input.ModelID + log.Warn().Msgf("Model not found in context: %s", input.ModelID) + } + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + if err != nil { + modelFile = input.ModelID + log.Warn().Msgf("Model not found in context: %s", input.ModelID) + } else { + if input.ModelID != "" { + modelFile = input.ModelID + } else { + modelFile = cfg.Model + } + } + log.Debug().Msgf("Request for model: %s", modelFile) + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg) + if err != nil { + return err + } + return c.Download(filePath) + } +} diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 9c3f890d..508a29ab 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -46,7 +46,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi cfg.Backend = input.Backend } - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, ml, appConfig, *cfg) + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg) if err != nil { return err } diff --git a/core/schema/elevenlabs.go b/core/schema/elevenlabs.go new file mode 100644 index 00000000..8bd6be3b --- /dev/null +++ b/core/schema/elevenlabs.go @@ -0,0 +1,6 @@ +package schema + +type ElevenLabsTTSRequest struct { + Text string `json:"text" yaml:"text"` + ModelID string `json:"model_id" yaml:"model_id"` +} diff --git a/core/schema/localai.go b/core/schema/localai.go index 115183a3..5f5fd41e 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -17,5 +17,6 @@ type BackendMonitorResponse struct { type TTSRequest struct { Model string `json:"model" yaml:"model"` Input string `json:"input" yaml:"input"` + Voice string `json:"voice" yaml:"voice"` Backend string `json:"backend" yaml:"backend"` } diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md index 57b783ee..f53407a1 100644 --- a/docs/content/docs/features/text-to-audio.md +++ b/docs/content/docs/features/text-to-audio.md @@ -6,7 +6,13 @@ weight = 11 url = "/features/text-to-audio/" +++ -The `/tts` endpoint can be used to generate speech from text. +## API Compatibility + +The LocalAI TTS API is compatible with the [OpenAI TTS API](https://platform.openai.com/docs/guides/text-to-speech) and the [Elevenlabs](https://api.elevenlabs.io/docs) API. + +## LocalAI API + +The `/tts` endpoint can also be used to generate speech from text. ## Usage diff --git a/main.go b/main.go index 21560e5a..169c3400 100644 --- a/main.go +++ b/main.go @@ -50,7 +50,7 @@ func main() { app := &cli.App{ Name: "LocalAI", Version: internal.PrintableVersion(), - Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.", + Usage: "OpenAI, OSS alternative. Drop-in compatible API for running LLM, GPT and genAI models locally on CPU, GPUs with consumer grade hardware. Supported server endpoints: OpenAI, Elevenlabs", Flags: []cli.Flag{ &cli.BoolFlag{ Name: "f16", @@ -394,6 +394,12 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit Usage: "Model name to run the TTS", Required: true, }, + &cli.StringFlag{ + Name: "voice", + Aliases: []string{"v"}, + Usage: "Voice name to run the TTS (optional)", + Required: true, + }, &cli.StringFlag{ Name: "output-file", Aliases: []string{"o"}, @@ -427,7 +433,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit defer ml.StopAllGRPC() - filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, ml, opts, config.BackendConfig{}) + filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, ctx.String("voice"), ml, opts, config.BackendConfig{}) if err != nil { return err } diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go index 1268f2cc..48551499 100644 --- a/pkg/grpc/proto/backend.pb.go +++ b/pkg/grpc/proto/backend.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.26.0 -// protoc v4.25.3 +// protoc v4.23.4 // source: backend.proto package proto @@ -1436,6 +1436,7 @@ type TTSRequest struct { Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"` Model string `protobuf:"bytes,2,opt,name=model,proto3" json:"model,omitempty"` Dst string `protobuf:"bytes,3,opt,name=dst,proto3" json:"dst,omitempty"` + Voice string `protobuf:"bytes,4,opt,name=voice,proto3" json:"voice,omitempty"` } func (x *TTSRequest) Reset() { @@ -1491,6 +1492,13 @@ func (x *TTSRequest) GetDst() string { return "" } +func (x *TTSRequest) GetVoice() string { + if x != nil { + return x.Voice + } + return "" +} + type TokenizationResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1896,84 +1904,86 @@ var file_backend_proto_rawDesc = []byte{ 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, - 0x22, 0x48, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, + 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, - 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, - 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, - 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, - 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x27, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, - 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, - 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, - 0x6f, 0x77, 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, - 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x22, 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, - 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, - 0x61, 0x74, 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, - 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, - 0x4c, 0x49, 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, - 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, - 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, - 0x32, 0xf4, 0x04, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, - 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, - 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, - 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, - 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, + 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, + 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, + 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, + 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, + 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, + 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, + 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, + 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, + 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, + 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, + 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, + 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, + 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, + 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, + 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, + 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, + 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, + 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, + 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, + 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xf4, 0x04, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, + 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, + 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, + 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, + 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, + 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, + 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, + 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, + 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, - 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, - 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, - 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, - 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, - 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, - 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, - 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, - 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, - 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, - 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, - 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, - 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, - 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, - 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, + 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, + 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, + 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, + 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, + 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, + 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x5a, 0x0a, + 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, + 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, + 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, + 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, + 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go index 89552e36..ef5187bc 100644 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ b/pkg/grpc/proto/backend_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.2.0 -// - protoc v4.25.3 +// - protoc v4.23.4 // source: backend.proto package proto From 44adbd2c759a87f2dc61f1bf7b8850a38b57b1c4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 15 Mar 2024 00:06:42 +0100 Subject: [PATCH 0141/2895] :arrow_up: Update go-skynet/go-llama.cpp (#1835) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fcebcadc..5b8f1cbe 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ GOVET=$(GOCMD) vet BINARY_NAME=local-ai # llama.cpp versions -GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 +GOLLAMA_VERSION?=6a8041ef6b46d4712afc3ae791d1c2d73da0ad1c GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 From db199f61dae720e1ebe3156829366040a9a4d7cc Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 15 Mar 2024 04:18:58 -0400 Subject: [PATCH 0142/2895] fix: osx build default.metallib (#1837) fix: osx build default.metallib (#1837) * port osx fix from refactor pr to slim pr * manually bump llama.cpp version to unstick CI? --- Makefile | 7 ++----- backend/cpp/llama/Makefile | 4 +--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 5b8f1cbe..1a008611 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=6a8041ef6b46d4712afc3ae791d1c2d73da0ad1c GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=19885d205e768579ab090d1e99281cae58c21b54 +CPPLLAMA_VERSION?=4755afd1cbd40d93c017e5b98c39796f52345314 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all @@ -462,9 +462,6 @@ backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/ # TODO: every binary should have its own folder instead, so can have different implementations -ifeq ($(BUILD_TYPE),metal) - cp backend/cpp/llama/llama.cpp/ggml-common.h backend-assets/grpc/ -endif ## BACKEND CPP LLAMA START # Sets the variables in case it has to build the gRPC locally. @@ -494,7 +491,7 @@ backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp # TODO: every binary should have its own folder instead, so can have different metal implementations ifeq ($(BUILD_TYPE),metal) - cp backend/cpp/llama/llama.cpp/build/bin/ggml-common.h backend-assets/grpc/ + cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/ endif backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index 3fa84a4a..f2e17a9d 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -18,9 +18,7 @@ else ifeq ($(BUILD_TYPE),clblas) # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ else ifeq ($(BUILD_TYPE),hipblas) CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -# If it's OSX, embed the metal library for fewer moving parts. -else ifeq ($(BUILD_TYPE),metal) - CMAKE_ARGS+=-DLLAMA_METAL_EMBED_LIBRARY=ON +# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation endif ifeq ($(BUILD_TYPE),sycl_f16) From ae2e4fc2fedcde377d4e32a838554eac648df85e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 15 Mar 2024 18:13:30 +0100 Subject: [PATCH 0143/2895] docs(transformers): add docs section about transformers (#1841) --- docs/content/docs/features/text-generation.md | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md index 121f90a8..1d0e1e9e 100644 --- a/docs/content/docs/features/text-generation.md +++ b/docs/content/docs/features/text-generation.md @@ -272,3 +272,56 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d "temperature": 0.1, "top_p": 0.1 }' ``` + +### Transformers + +[Transformers](https://huggingface.co/docs/transformers/index) is a State-of-the-art Machine Learning library for PyTorch, TensorFlow, and JAX. + +LocalAI has a built-in integration with Transformers, and it can be used to run models. + +This is an extra backend - in the container images (the `extra` images already contains python dependencies for Transformers) is already available and there is nothing to do for the setup. + +#### Setup + +Create a YAML file for the model you want to use with `transformers`. + +To setup a model, you need to just specify the model name in the YAML config file: +```yaml +name: transformers +backend: transformers +parameters: + model: "facebook/opt-125m" +type: AutoModelForCausalLM +quantization: bnb_4bit # One of: bnb_8bit, bnb_4bit, xpu_4bit (optional) +``` + +The backend will automatically download the required files in order to run the model. + +#### Parameters + +##### Type + +| Type | Description | +| --- | --- | +| `AutoModelForCausalLM` | `AutoModelForCausalLM` is a model that can be used to generate sequences. | +| N/A | Defaults to `AutoModel` | + + +##### Quantization + +| Quantization | Description | +| --- | --- | +| `bnb_8bit` | 8-bit quantization | +| `bnb_4bit` | 4-bit quantization | +| `xpu_4bit` | 4-bit quantization for Intel XPUs | + +#### Usage + +Use the `completions` endpoint by specifying the `transformers` model: +``` +curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ + "model": "transformers", + "prompt": "Hello, my name is", + "temperature": 0.1, "top_p": 0.1 + }' +``` \ No newline at end of file From 89351f1a7d020b808ef20093a75db7bd0587ad36 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 15 Mar 2024 18:14:23 +0100 Subject: [PATCH 0144/2895] feat(embeddings): do not require to be configured (#1842) Certain engines requires to know during model loading if the embedding feature has to be enabled, however, it is impractical to have to set it to ALL the backends that supports embeddings. There are transformers and sentencentransformers that seamelessly handle both cases, without having this settings to be explicitly enabled. The case sussist only for ggml-based models that needs to enable featuresets during model loading (and thus settings `embedding` is required), however most of the other engines does not require this. This change disables the check done at code side, making easier to use embeddings by not having to specify explicitly `embeddings: true`. Part of: https://github.com/mudler/LocalAI/issues/1373 --- core/backend/embeddings.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 94310854..03ff90b9 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -10,10 +10,6 @@ import ( ) func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { - if !backendConfig.Embeddings { - return nil, fmt.Errorf("endpoint disabled for this model by API configuration") - } - modelFile := backendConfig.Model grpcOpts := gRPCModelOpts(backendConfig) From 5826fb8e6d996300a9f6d4542db6f59adabdaa2f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 16 Mar 2024 00:51:03 +0100 Subject: [PATCH 0145/2895] :arrow_up: Update mudler/go-piper (#1844) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a008611..6d309152 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d # go-piper version -PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07 +PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 # stablediffusion version STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485 From 8967ed1601b1f73a17d0255768024ccb6de709ce Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 16 Mar 2024 12:25:41 +0100 Subject: [PATCH 0146/2895] :arrow_up: Update ggerganov/llama.cpp (#1840) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6d309152..2bd83fc1 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=6a8041ef6b46d4712afc3ae791d1c2d73da0ad1c GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=4755afd1cbd40d93c017e5b98c39796f52345314 +CPPLLAMA_VERSION?=d84c48505f60bcd358b82a751d40418c4d235643 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 801b481beb5b235eaf9f390155422b9db733fd40 Mon Sep 17 00:00:00 2001 From: Chakib Benziane Date: Sun, 17 Mar 2024 09:43:20 +0100 Subject: [PATCH 0147/2895] fixes #1051: handle openai presence and request penalty parameters (#1817) * fix request debugging, disable marshalling of context fields Signed-off-by: blob42 * merge frequency_penalty request parm with config Signed-off-by: blob42 * openai: add presence_penalty parameter Signed-off-by: blob42 --------- Signed-off-by: blob42 --- core/http/endpoints/openai/request.go | 8 ++++++++ core/schema/openai.go | 2 +- core/schema/prediction.go | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index 505244c4..1f845c6f 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -185,6 +185,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.RepeatPenalty = input.RepeatPenalty } + if input.FrequencyPenalty!= 0 { + config.FrequencyPenalty = input.FrequencyPenalty + } + + if input.PresencePenalty!= 0 { + config.PresencePenalty = input.PresencePenalty + } + if input.Keep != 0 { config.Keep = input.Keep } diff --git a/core/schema/openai.go b/core/schema/openai.go index 1c13847c..6aa0f1b0 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct { type OpenAIRequest struct { PredictionOptions - Context context.Context `json:"-"` + Context context.Context `json:"-"` Cancel context.CancelFunc `json:"-"` // whisper diff --git a/core/schema/prediction.go b/core/schema/prediction.go index d75e5eb8..4933f2d2 100644 --- a/core/schema/prediction.go +++ b/core/schema/prediction.go @@ -25,6 +25,7 @@ type PredictionOptions struct { Keep int `json:"n_keep" yaml:"n_keep"` FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` + PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"` TFZ float64 `json:"tfz" yaml:"tfz"` TypicalP float64 `json:"typical_p" yaml:"typical_p"` From 020ce29cd84fa34de359c51dc6a824b1a86a7d02 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sun, 17 Mar 2024 09:39:20 -0500 Subject: [PATCH 0148/2895] fix(make): allow to parallelize jobs (#1845) * fix: clean up Makefile dependencies to allow for parallel builds * refactor: remove old unused backend from Makefile * fix: finish removing legacy backend, update piper * fix: I broke llama... I fixed llama * feat: give the tests and builds a few threads * fix: ensure libraries are replaced before build, add dropreplace target * Fix image build workflows --- .dockerignore | 2 +- .github/workflows/image-pr.yml | 2 + .github/workflows/image.yml | 2 + .github/workflows/image_build.yml | 6 + Dockerfile | 39 +++--- Makefile | 222 ++++++++++++++---------------- backend/cpp/grpc/Makefile | 2 +- backend/cpp/llama/Makefile | 2 +- 8 files changed, 139 insertions(+), 138 deletions(-) diff --git a/.dockerignore b/.dockerignore index 979a26a3..97e8aa34 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,4 +3,4 @@ models examples/chatbot-ui/models examples/rwkv/models examples/**/models -Dockerfile \ No newline at end of file +Dockerfile* \ No newline at end of file diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 2e9a0afe..17456617 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -22,6 +22,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + makeflags: "-j3" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -80,6 +81,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + makeflags: "-j3" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 2a7fac27..5ba0f1bf 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -26,6 +26,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + makeflags: "-j3" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -199,6 +200,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + makeflags: "-j3" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index a45473b4..a978f1bf 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -46,6 +46,11 @@ on: required: true default: '' type: string + makeflags: + description: 'Make Flags' + required: false + default: '' + type: string secrets: dockerUsername: required: true @@ -160,6 +165,7 @@ jobs: FFMPEG=${{ inputs.ffmpeg }} IMAGE_TYPE=${{ inputs.image-type }} BASE_IMAGE=${{ inputs.base-image }} + MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile platforms: ${{ inputs.platforms }} diff --git a/Dockerfile b/Dockerfile index d9354e6d..ebda80ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,7 +63,9 @@ WORKDIR /build RUN test -n "$TARGETARCH" \ || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`') -# Extras requirements +################################### +################################### + FROM requirements-core as requirements-extras RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ @@ -93,8 +95,11 @@ FROM requirements-${IMAGE_TYPE} as builder ARG GO_TAGS="stablediffusion tts" ARG GRPC_BACKENDS ARG BUILD_GRPC=true +ARG MAKEFLAGS + ENV GRPC_BACKENDS=${GRPC_BACKENDS} ENV GO_TAGS=${GO_TAGS} +ENV MAKEFLAGS=${MAKEFLAGS} ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_VISIBLE_DEVICES=all @@ -116,10 +121,10 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build RUN if [ "${BUILD_GRPC}" = "true" ]; then \ - git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ -DgRPC_BUILD_TESTS=OFF \ - ../.. && make -j12 install \ + ../.. && make install \ ; fi # Rebuild with defaults backends @@ -139,10 +144,12 @@ ARG FFMPEG ARG BUILD_TYPE ARG TARGETARCH ARG IMAGE_TYPE=extras +ARG MAKEFLAGS ENV BUILD_TYPE=${BUILD_TYPE} ENV REBUILD=false ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz +ENV MAKEFLAGS=${MAKEFLAGS} ARG CUDA_MAJOR_VERSION=11 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility @@ -186,43 +193,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/ ## Duplicated from Makefile to avoid having a big layer that's hard to push RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/autogptq \ + make -C backend/python/autogptq \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/bark \ + make -C backend/python/bark \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/diffusers \ + make -C backend/python/diffusers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/vllm \ + make -C backend/python/vllm \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/mamba \ + make -C backend/python/mamba \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/sentencetransformers \ + make -C backend/python/sentencetransformers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/transformers \ + make -C backend/python/transformers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/vall-e-x \ + make -C backend/python/vall-e-x \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/exllama \ + make -C backend/python/exllama \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/exllama2 \ + make -C backend/python/exllama2 \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/petals \ + make -C backend/python/petals \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/transformers-musicgen \ + make -C backend/python/transformers-musicgen \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - make -C backend/python/coqui \ + make -C backend/python/coqui \ ; fi # Make sure the models directory exists diff --git a/Makefile b/Makefile index 2bd83fc1..4449e501 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,7 @@ GOVET=$(GOCMD) vet BINARY_NAME=local-ai # llama.cpp versions -GOLLAMA_VERSION?=6a8041ef6b46d4712afc3ae791d1c2d73da0ad1c - -GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 - +GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be CPPLLAMA_VERSION?=d84c48505f60bcd358b82a751d40418c4d235643 # gpt4all version @@ -148,7 +145,6 @@ endif ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings -ALL_GRPC_BACKENDS+=backend-assets/grpc/llama ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all @@ -168,40 +164,41 @@ ifeq ($(BUILD_API_ONLY),true) GRPC_BACKENDS= endif -.PHONY: all test build vendor +.PHONY: all test build vendor get-sources prepare-sources prepare all: help -## GPT4ALL -sources/gpt4all: - git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all - cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1 - -## go-piper -sources/go-piper: - git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper - cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1 - ## BERT embeddings sources/go-bert: git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1 -## stable diffusion -sources/go-stable-diffusion: - git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion - cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1 +sources/go-bert/libgobert.a: sources/go-bert + $(MAKE) -C sources/go-bert libgobert.a -sources/go-stable-diffusion/libstablediffusion.a: - $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a +## go-llama-ggml +sources/go-llama-ggml: + git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml + cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1 -## tiny-dream -sources/go-tiny-dream: - git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream - cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1 +sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml + $(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a -sources/go-tiny-dream/libtinydream.a: - $(MAKE) -C sources/go-tiny-dream libtinydream.a +## go-piper +sources/go-piper: + git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper + cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1 + +sources/go-piper/libpiper_binding.a: sources/go-piper + $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o + +## GPT4ALL +sources/gpt4all: + git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all + cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1 + +sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all + $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a ## RWKV sources/go-rwkv: @@ -211,23 +208,23 @@ sources/go-rwkv: sources/go-rwkv/librwkv.a: sources/go-rwkv cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. -sources/go-bert/libgobert.a: sources/go-bert - $(MAKE) -C sources/go-bert libgobert.a +## stable diffusion +sources/go-stable-diffusion: + git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion + cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1 -backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a - mkdir -p backend-assets/gpt4all - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true +sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion + $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a -backend-assets/espeak-ng-data: sources/go-piper - mkdir -p backend-assets/espeak-ng-data - $(MAKE) -C sources/go-piper piper.o - @cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data +## tiny-dream +sources/go-tiny-dream: + git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream + cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1 -sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all - $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a +sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream + $(MAKE) -C sources/go-tiny-dream libtinydream.a +## whisper sources/whisper.cpp: git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 @@ -235,47 +232,34 @@ sources/whisper.cpp: sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && make libwhisper.a -sources/go-llama: - git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama - cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1 - -sources/go-llama-ggml: - git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml - cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1 - -sources/go-llama/libbinding.a: sources/go-llama - $(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a - -sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml - $(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a - -sources/go-piper/libpiper_binding.a: sources/go-piper - $(MAKE) -C sources/go-piper libpiper_binding.a example/main - -backend/cpp/llama/llama.cpp: - LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp - -get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream - touch $@ +get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream replace: - $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert - $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper + $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion + $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang + +dropreplace: + $(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp + $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp + $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go + $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp + $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream + $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper + $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion + $(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang prepare-sources: get-sources replace $(GOCMD) mod download - touch $@ ## GENERIC rebuild: ## Rebuilds the project $(GOCMD) clean -cache - $(MAKE) -C sources/go-llama clean $(MAKE) -C sources/go-llama-ggml clean $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean $(MAKE) -C sources/go-rwkv clean @@ -287,7 +271,6 @@ rebuild: ## Rebuilds the project $(MAKE) build prepare: prepare-sources $(OPTIONAL_TARGETS) - touch $@ clean: ## Remove build related file $(GOCMD) clean -cache @@ -298,10 +281,10 @@ clean: ## Remove build related file rm -rf backend-assets $(MAKE) -C backend/cpp/grpc clean $(MAKE) -C backend/cpp/llama clean + $(MAKE) dropreplace ## Build: - -build: backend-assets grpcs prepare ## Build the project +build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) $(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET}) $(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET}) @@ -454,39 +437,55 @@ ifeq ($(BUILD_API_ONLY),true) touch backend-assets/keep endif -backend-assets/grpc: +backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a + mkdir -p backend-assets/espeak-ng-data + @cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data + +backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a + mkdir -p backend-assets/gpt4all + @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true + @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true + @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true + +backend-assets/grpc: replace mkdir -p backend-assets/grpc -backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a - $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/ -# TODO: every binary should have its own folder instead, so can have different implementations +backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/ + +backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ + +backend-assets/grpc/langchain-huggingface: backend-assets/grpc + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/ + +backend/cpp/llama/llama.cpp: + LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp -## BACKEND CPP LLAMA START -# Sets the variables in case it has to build the gRPC locally. INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \ - -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \ - -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \ - -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \ - -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include - + -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \ + -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \ + -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \ + -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include backend/cpp/llama/grpc-server: +# Conditionally build grpc for the llama backend to use if needed ifdef BUILD_GRPC_FOR_BACKEND_LLAMA $(MAKE) -C backend/cpp/grpc build - export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \ - export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \ - export PATH="${INSTALLED_PACKAGES}/bin:${PATH}" && \ - CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server + _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \ + _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \ + PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \ + CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \ + LLAMA_VERSION=$(CPPLLAMA_VERSION) \ + $(MAKE) -C backend/cpp/llama grpc-server else echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined." LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server endif -## BACKEND CPP LLAMA END -## backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp # TODO: every binary should have its own folder instead, so can have different metal implementations @@ -494,43 +493,28 @@ ifeq ($(BUILD_TYPE),metal) cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/ endif -backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a +backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ -backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ - -backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv - -backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/ - -backend-assets/grpc/langchain-huggingface: backend-assets/grpc - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/ - -backend-assets/grpc/stablediffusion: backend-assets/grpc - if [ ! -f backend-assets/grpc/stablediffusion ]; then \ - $(MAKE) sources/go-stable-diffusion; \ - $(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \ - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \ - fi - -backend-assets/grpc/tinydream: backend-assets/grpc sources/go-tiny-dream/libtinydream.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream - -backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a +backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ -backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a +backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv + +backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion + +backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream + +backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/ diff --git a/backend/cpp/grpc/Makefile b/backend/cpp/grpc/Makefile index a6486113..6a181794 100644 --- a/backend/cpp/grpc/Makefile +++ b/backend/cpp/grpc/Makefile @@ -48,7 +48,7 @@ $(INSTALLED_PACKAGES): grpc_build $(GRPC_REPO): git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc - cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH) + cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH) $(GRPC_BUILD): $(GRPC_REPO) mkdir -p $(GRPC_BUILD) diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index f2e17a9d..8502ae2f 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -36,7 +36,7 @@ llama.cpp: fi cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1 -llama.cpp/examples/grpc-server: +llama.cpp/examples/grpc-server: llama.cpp mkdir -p llama.cpp/examples/grpc-server cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/ cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/ From 88b65f63d0afc8b51e26fe6feec65ce9d1cbccc8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 17 Mar 2024 23:08:22 +0100 Subject: [PATCH 0149/2895] fix(go-llama): use llama-cpp as default (#1849) * fix(go-llama): use llama-cpp as default Signed-off-by: Ettore Di Giacinto * fix(backends): drop obsoleted lines --------- Signed-off-by: Ettore Di Giacinto --- pkg/model/initializers.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 1e2af8f9..a6a84fd7 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -15,12 +15,11 @@ import ( ) var Aliases map[string]string = map[string]string{ - "go-llama": GoLlamaBackend, + "go-llama": LLamaCPP, "llama": LLamaCPP, } const ( - GoLlamaBackend = "llama" LlamaGGML = "llama-ggml" LLamaCPP = "llama-cpp" Gpt4AllLlamaBackend = "gpt4all-llama" @@ -35,15 +34,11 @@ const ( TinyDreamBackend = "tinydream" PiperBackend = "piper" LCHuggingFaceBackend = "langchain-huggingface" - - // External Backends that need special handling within LocalAI: - TransformersMusicGen = "transformers-musicgen" ) var AutoLoadBackends []string = []string{ LLamaCPP, LlamaGGML, - GoLlamaBackend, Gpt4All, BertEmbeddingsBackend, RwkvBackend, From d2b83d8357f6de6aa5512130785165d24ad92b32 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 17 Mar 2024 23:08:32 +0100 Subject: [PATCH 0150/2895] :arrow_up: Update docs version mudler/LocalAI (#1847) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 59fd693c..cd7dbcda 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.9.0" + "version": "v2.10.0" } From 0eb0ac7dd0cde42f789f9c4b8da4fe999f58555f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 18 Mar 2024 08:57:58 +0100 Subject: [PATCH 0151/2895] :arrow_up: Update ggerganov/llama.cpp (#1848) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4449e501..f7ce5218 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=d84c48505f60bcd358b82a751d40418c4d235643 +CPPLLAMA_VERSION?=d01b3c4c32357567f3531d4e6ceffc5d23e87583 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b202bfaaa02fecbe0d2f9c9555c2c5e8eaa86ca8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 18 Mar 2024 15:56:53 +0100 Subject: [PATCH 0152/2895] deps(whisper.cpp): update, fix cublas build (#1846) fix(whisper.cpp): Add stubs and -lcuda --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f7ce5218..8bbc0625 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346 +WHISPER_CPP_VERSION?=a56f435fd475afd7edf02bfbf9f8c77f527198c2 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d @@ -35,6 +35,7 @@ export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export CMAKE_ARGS?= CGO_LDFLAGS?= +CGO_LDFLAGS_WHISPER?= CUDA_LIBPATH?=/usr/local/cuda/lib64/ GO_TAGS?= BUILD_ID?=git @@ -88,10 +89,12 @@ ifeq ($(BUILD_TYPE),openblas) export WHISPER_OPENBLAS=1 endif + ifeq ($(BUILD_TYPE),cublas) CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) export LLAMA_CUBLAS=1 export WHISPER_CUBLAS=1 + CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda endif ifeq ($(BUILD_TYPE),hipblas) @@ -515,7 +518,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ + CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/ grpcs: prepare $(GRPC_BACKENDS) From fa9e330fc692e3bee72f724e88d2687dd8ed2cbc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 18 Mar 2024 18:59:24 +0100 Subject: [PATCH 0153/2895] fix(llama.cpp): fix eos without cache (#1852) --- backend/cpp/llama/grpc-server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index c91ce854..a2e39a9c 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -1084,7 +1084,7 @@ struct llama_server_context slot.has_next_token = false; } - if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model)) + if (result.tok == llama_token_eos(model)) { slot.stopped_eos = true; slot.has_next_token = false; From 843f93e1ab0378dda67b77cb9c80746aba70b4c5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 18 Mar 2024 18:59:39 +0100 Subject: [PATCH 0154/2895] fix(config): default to debug=false if not set (#1853) --- core/config/backend_config.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 2adfeee5..daaf0257 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -276,8 +276,12 @@ func (cfg *BackendConfig) SetDefaults(debug bool, threads, ctx int, f16 bool) { cfg.F16 = &f16 } + if cfg.Debug == nil { + cfg.Debug = &falseV + } + if debug { - cfg.Debug = &debug + cfg.Debug = &trueV } } From a046dcac5eb5d647e21cefb92a189dbd0255518f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 18 Mar 2024 19:14:48 +0100 Subject: [PATCH 0155/2895] fix(config-watcher): start only if config-directory exists (#1854) Signed-off-by: Ettore Di Giacinto --- main.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index 169c3400..400dcb57 100644 --- a/main.go +++ b/main.go @@ -306,11 +306,16 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } - closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options) - defer closeConfigWatcherFn() + configdir := ctx.String("localai-config-dir") + // Watch the configuration directory + // If the directory does not exist, we don't watch it + if _, err := os.Stat(configdir); err == nil { + closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options) + defer closeConfigWatcherFn() - if err != nil { - return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir")) + if err != nil { + return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir")) + } } appHTTP, err := http.App(cl, ml, options) From ed5734ae25edadb631e9de58d1f10f9c50e18c00 Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 18 Mar 2024 14:19:43 -0400 Subject: [PATCH 0156/2895] test/fix: OSX Test Repair (#1843) * test with gguf instead of ggml. Updates testPrompt to match? Adds debugging line to Dockerfile that I've found helpful recently. * fix testPrompt slightly * Sad Experiment: Test GH runner without metal? * break apart CGO_LDFLAGS * switch runner * upstream llama.cpp disables Metal on Github CI! * missed a dir from clean-tests * CGO_LDFLAGS * tmate failure + NO_ACCELERATE * whisper.cpp has a metal fix * do the exact opposite of the name of this branch, but keep it around for unrelated fixes? * add back newlines * add tmate to linux for testing * update fixtures * timeout for tmate --- .github/workflows/test.yml | 12 ++++++++++-- Dockerfile | 1 + Makefile | 19 +++++++++++++++---- backend/cpp/llama/Makefile | 5 +++++ core/http/api_test.go | 8 ++++---- tests/models_fixtures/config.yaml | 4 ++-- tests/models_fixtures/gpt4.yaml | 2 +- tests/models_fixtures/gpt4_2.yaml | 2 +- 8 files changed, 39 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a2cc6c8..8222508a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -105,9 +105,13 @@ jobs: - name: Test run: | GO_TAGS="stablediffusion tts" make test + - name: Setup tmate session if tests fail + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 tests-apple: - runs-on: macOS-latest + runs-on: macOS-14 strategy: matrix: go-version: ['1.21.x'] @@ -130,4 +134,8 @@ jobs: run: | export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include - CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test \ No newline at end of file + BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test + - name: Setup tmate session if tests fail + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index ebda80ba..b083690e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -108,6 +108,7 @@ WORKDIR /build COPY . . COPY .git . +RUN echo "GO_TAGS: $GO_TAGS" RUN make prepare # If we are building with clblas support, we need the libraries for the builds diff --git a/Makefile b/Makefile index 8bbc0625..ff7ec797 100644 --- a/Makefile +++ b/Makefile @@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s) endif ifeq ($(OS),Darwin) - CGO_LDFLAGS += -lcblas -framework Accelerate + ifeq ($(OSX_SIGNING_IDENTITY),) OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/') endif @@ -81,6 +81,12 @@ ifeq ($(OS),Darwin) # disable metal if on Darwin and any other value is explicitly passed. else ifneq ($(BUILD_TYPE),metal) CMAKE_ARGS+=-DLLAMA_METAL=OFF + export LLAMA_NO_ACCELERATE=1 + endif + + ifeq ($(BUILD_TYPE),metal) +# -lcblas removed: it seems to always be listed as a duplicate flag. + CGO_LDFLAGS += -framework Accelerate endif endif @@ -286,6 +292,11 @@ clean: ## Remove build related file $(MAKE) -C backend/cpp/llama clean $(MAKE) dropreplace +clean-tests: + rm -rf test-models + rm -rf test-dir + rm -rf core/http/backend-assets + ## Build: build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) @@ -305,10 +316,10 @@ osx-signed: build run: prepare ## run local-ai CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./ -test-models/testmodel: +test-models/testmodel.ggml: mkdir test-models mkdir test-dir - wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel + wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav @@ -320,7 +331,7 @@ prepare-test: grpcs cp -rf backend-assets core/http cp tests/models_fixtures/* test-models -test: prepare test-models/testmodel grpcs +test: prepare test-models/testmodel.ggml grpcs @echo 'Running tests' export GO_TAGS="tts stablediffusion" $(MAKE) prepare-test diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index 8502ae2f..3d31284a 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -19,6 +19,11 @@ else ifeq ($(BUILD_TYPE),clblas) else ifeq ($(BUILD_TYPE),hipblas) CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON # If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation +# But if it's OSX without metal, disable it here +else ifeq ($(OS),darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DLLAMA_METAL=OFF + endif endif ifeq ($(BUILD_TYPE),sycl_f16) diff --git a/core/http/api_test.go b/core/http/api_test.go index b0579a19..ca69e8bf 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -666,15 +666,15 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred()) Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) - It("can generate completions", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt}) + It("can generate completions via ggml", func() { + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) - It("can generate chat completions ", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) + It("can generate chat completions via ggml", func() { + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml index 749d1699..f61c2a7c 100644 --- a/tests/models_fixtures/config.yaml +++ b/tests/models_fixtures/config.yaml @@ -1,6 +1,6 @@ - name: list1 parameters: - model: testmodel + model: testmodel.ggml top_p: 80 top_k: 0.9 temperature: 0.1 @@ -19,7 +19,7 @@ top_p: 80 top_k: 0.9 temperature: 0.1 - model: testmodel + model: testmodel.ggml context_size: 200 stopwords: - "HUMAN:" diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml index 652a407c..43e77586 100644 --- a/tests/models_fixtures/gpt4.yaml +++ b/tests/models_fixtures/gpt4.yaml @@ -1,6 +1,6 @@ name: gpt4all parameters: - model: testmodel + model: testmodel.ggml top_p: 80 top_k: 0.9 temperature: 0.1 diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml index 904693ca..8a211153 100644 --- a/tests/models_fixtures/gpt4_2.yaml +++ b/tests/models_fixtures/gpt4_2.yaml @@ -1,6 +1,6 @@ name: gpt4all-2 parameters: - model: testmodel + model: testmodel.ggml top_p: 80 top_k: 0.9 temperature: 0.1 From 621541a92f9c2ef84336a103046dc2ad2b4ef7e3 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 19 Mar 2024 00:44:23 +0100 Subject: [PATCH 0157/2895] :arrow_up: Update ggerganov/whisper.cpp (#1508) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ff7ec797..29d49a80 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=a56f435fd475afd7edf02bfbf9f8c77f527198c2 +WHISPER_CPP_VERSION?=e7794a868ffb53f5299125aaaf74fbcad93cd06c # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From b12a2053200e8f772ea970fc7c99ae9d2c727b37 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 19 Mar 2024 00:44:45 +0100 Subject: [PATCH 0158/2895] :arrow_up: Update docs version mudler/LocalAI (#1856) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index cd7dbcda..20ca21c5 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.10.0" + "version": "v2.10.1" } From ead61bf9d5b6024d2a6a971bbdfd612c8e059aa7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 19 Mar 2024 01:03:17 +0100 Subject: [PATCH 0159/2895] :arrow_up: Update ggerganov/llama.cpp (#1857) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 29d49a80..f4d85d90 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=d01b3c4c32357567f3531d4e6ceffc5d23e87583 +CPPLLAMA_VERSION?=2d15886bb092c3b780c676b5cc57ff3337af9c83 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e4bf51d5bd9ff88164492f4518e557b08d59a18f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 20 Mar 2024 09:05:53 +0100 Subject: [PATCH 0160/2895] :arrow_up: Update ggerganov/llama.cpp (#1864) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f4d85d90..fe074592 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=2d15886bb092c3b780c676b5cc57ff3337af9c83 +CPPLLAMA_VERSION?=d8b009a9456bf5284376149f3deb09300a37701a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 7e34dfdae7298979d0202c6d82bdfae655f82582 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 20 Mar 2024 23:13:29 +0100 Subject: [PATCH 0161/2895] :arrow_up: Update ggerganov/llama.cpp (#1866) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fe074592..653d6beb 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=d8b009a9456bf5284376149f3deb09300a37701a +CPPLLAMA_VERSION?=1c51f98adcbad40e3c41f0a6ffadeb723190b417 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From eeaf8c7ccd18768406b665797f641cc302d91f13 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 20 Mar 2024 23:26:29 +0100 Subject: [PATCH 0162/2895] :arrow_up: Update ggerganov/whisper.cpp (#1867) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 653d6beb..85d6c7c9 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=e7794a868ffb53f5299125aaaf74fbcad93cd06c +WHISPER_CPP_VERSION?=79d5765e7e1a904d976adfd5636da7da43163eb3 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From e533dcf506398e34833c8e66e7e821ffaedbee45 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 Mar 2024 01:12:20 +0100 Subject: [PATCH 0163/2895] feat(functions/aio): all-in-one images, function template enhancements (#1862) * feat(startup): allow to specify models from local files * feat(aio): add Dockerfile, make targets, aio profiles * feat(template): add Function and LastMessage * add hermes2-pro-mistral * update hermes2 definition * feat(template): add sprig * feat(template): expose FunctionCall * feat(aio): switch llm for text --- Dockerfile.aio | 9 ++++ Makefile | 14 ++++++ aio/cpu/embeddings.yaml | 13 ++++++ aio/cpu/image-gen.yaml | 53 +++++++++++++++++++++++ aio/cpu/speech-to-text.yaml | 18 ++++++++ aio/cpu/text-to-speech.yaml | 15 +++++++ aio/cpu/text-to-text.yaml | 22 ++++++++++ aio/cpu/vision.yaml | 40 +++++++++++++++++ aio/gpu-8g/embeddings.yaml | 13 ++++++ aio/gpu-8g/image-gen.yaml | 22 ++++++++++ aio/gpu-8g/speech-to-text.yaml | 18 ++++++++ aio/gpu-8g/text-to-speech.yaml | 15 +++++++ aio/gpu-8g/text-to-text.yaml | 51 ++++++++++++++++++++++ aio/gpu-8g/vision.yaml | 40 +++++++++++++++++ core/http/endpoints/openai/chat.go | 3 ++ embedded/models/hermes-2-pro-mistral.yaml | 51 ++++++++++++++++++++++ go.mod | 9 ++++ go.sum | 34 +++++++++++++++ pkg/model/loader.go | 6 ++- pkg/startup/model_preload.go | 18 +++++++- 20 files changed, 462 insertions(+), 2 deletions(-) create mode 100644 Dockerfile.aio create mode 100644 aio/cpu/embeddings.yaml create mode 100644 aio/cpu/image-gen.yaml create mode 100644 aio/cpu/speech-to-text.yaml create mode 100644 aio/cpu/text-to-speech.yaml create mode 100644 aio/cpu/text-to-text.yaml create mode 100644 aio/cpu/vision.yaml create mode 100644 aio/gpu-8g/embeddings.yaml create mode 100644 aio/gpu-8g/image-gen.yaml create mode 100644 aio/gpu-8g/speech-to-text.yaml create mode 100644 aio/gpu-8g/text-to-speech.yaml create mode 100644 aio/gpu-8g/text-to-text.yaml create mode 100644 aio/gpu-8g/vision.yaml create mode 100644 embedded/models/hermes-2-pro-mistral.yaml diff --git a/Dockerfile.aio b/Dockerfile.aio new file mode 100644 index 00000000..4097e6d5 --- /dev/null +++ b/Dockerfile.aio @@ -0,0 +1,9 @@ +ARG BASE_IMAGE=ubuntu:22.04 + +FROM ${BASE_IMAGE} +ARG SIZE=cpu +ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml" + +COPY aio/${SIZE} /aio-models + +ENTRYPOINT [ "/build/entrypoint.sh" ] \ No newline at end of file diff --git a/Makefile b/Makefile index 85d6c7c9..c03091d0 100644 --- a/Makefile +++ b/Makefile @@ -535,6 +535,8 @@ backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper. grpcs: prepare $(GRPC_BACKENDS) DOCKER_IMAGE?=local-ai +DOCKER_AIO_IMAGE?=local-ai-aio +DOCKER_AIO_SIZE?=cpu IMAGE_TYPE?=core BASE_IMAGE?=ubuntu:22.04 @@ -545,6 +547,18 @@ docker: --build-arg GO_TAGS=$(GO_TAGS) \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ -t $(DOCKER_IMAGE) . + +docker-aio: + @echo "Building AIO image with size $(DOCKER_AIO_SIZE)" + @echo "Building AIO image with base image $(BASE_IMAGE)" + docker build \ + --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg SIZE=$(DOCKER_AIO_SIZE) \ + -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . + +docker-aio-all: + $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu + $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu docker-image-intel: docker build \ diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml new file mode 100644 index 00000000..512d63a4 --- /dev/null +++ b/aio/cpu/embeddings.yaml @@ -0,0 +1,13 @@ +name: all-minilm-l6-v2 +backend: sentencetransformers +embeddings: true +parameters: + model: all-MiniLM-L6-v2 + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ + "input": "Your text string goes here", + "model": "all-minilm-l6-v2" + }' \ No newline at end of file diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml new file mode 100644 index 00000000..3b9c2eec --- /dev/null +++ b/aio/cpu/image-gen.yaml @@ -0,0 +1,53 @@ +name: stablediffusion +backend: stablediffusion +parameters: + model: stablediffusion_assets + +license: "BSD-3" +urls: +- https://github.com/EdVince/Stable-Diffusion-NCNN +- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + +description: | + Stable Diffusion in NCNN with c++, supported txt2img and img2img + +download_files: +- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" +- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" +- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" +- filename: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" +- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" +- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" +- filename: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" \ No newline at end of file diff --git a/aio/cpu/speech-to-text.yaml b/aio/cpu/speech-to-text.yaml new file mode 100644 index 00000000..f7ebd217 --- /dev/null +++ b/aio/cpu/speech-to-text.yaml @@ -0,0 +1,18 @@ +name: whisper +backend: whisper +parameters: + model: ggml-whisper-base.bin + +usage: | + ## example audio file + wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg + + ## Send the example audio file to the transcriptions endpoint + curl http://localhost:8080/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@$PWD/gb1.ogg" -F model="whisper" + +download_files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/cpu/text-to-speech.yaml b/aio/cpu/text-to-speech.yaml new file mode 100644 index 00000000..93c11403 --- /dev/null +++ b/aio/cpu/text-to-speech.yaml @@ -0,0 +1,15 @@ +name: voice-en-us-amy-low +download_files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz + +parameters: + model: en-us-amy-low.onnx + +usage: | + To test if this model works as expected, you can use the following curl command: + + curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ + "model":"voice-en-us-amy-low", + "input": "Hi, this is a test." + }' \ No newline at end of file diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml new file mode 100644 index 00000000..7558ba9f --- /dev/null +++ b/aio/cpu/text-to-text.yaml @@ -0,0 +1,22 @@ +name: gpt-3.5-turbo +context_size: 2048 +f16: true +gpu_layers: 90 +mmap: true +trimsuffix: +- "\n" +parameters: + model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf + +template: + chat: &template |- + Instruct: {{.Input}} + Output: + completion: *template + +usage: | + To use this model, interact with the API (in another terminal) with curl for instance: + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml new file mode 100644 index 00000000..3d240681 --- /dev/null +++ b/aio/cpu/vision.yaml @@ -0,0 +1,40 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: bakllava-mmproj.gguf +parameters: + model: bakllava.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: bakllava.gguf + uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf +- filename: bakllava-mmproj.gguf + uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml new file mode 100644 index 00000000..512d63a4 --- /dev/null +++ b/aio/gpu-8g/embeddings.yaml @@ -0,0 +1,13 @@ +name: all-minilm-l6-v2 +backend: sentencetransformers +embeddings: true +parameters: + model: all-MiniLM-L6-v2 + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ + "input": "Your text string goes here", + "model": "all-minilm-l6-v2" + }' \ No newline at end of file diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml new file mode 100644 index 00000000..3857cd6b --- /dev/null +++ b/aio/gpu-8g/image-gen.yaml @@ -0,0 +1,22 @@ +name: dreamshaper +parameters: + model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors +backend: diffusers +step: 25 +f16: true +cuda: true +diffusers: + pipeline_type: StableDiffusionPipeline + cuda: true + enable_parameters: "negative_prompt,num_inference_steps" + scheduler_type: "k_dpmpp_2m" + +usage: | + curl http://localhost:8080/v1/images/generations \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "|", + "model": "dreamshaper", + "step": 25, + "size": "512x512" + }' \ No newline at end of file diff --git a/aio/gpu-8g/speech-to-text.yaml b/aio/gpu-8g/speech-to-text.yaml new file mode 100644 index 00000000..f7ebd217 --- /dev/null +++ b/aio/gpu-8g/speech-to-text.yaml @@ -0,0 +1,18 @@ +name: whisper +backend: whisper +parameters: + model: ggml-whisper-base.bin + +usage: | + ## example audio file + wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg + + ## Send the example audio file to the transcriptions endpoint + curl http://localhost:8080/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@$PWD/gb1.ogg" -F model="whisper" + +download_files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/gpu-8g/text-to-speech.yaml b/aio/gpu-8g/text-to-speech.yaml new file mode 100644 index 00000000..93c11403 --- /dev/null +++ b/aio/gpu-8g/text-to-speech.yaml @@ -0,0 +1,15 @@ +name: voice-en-us-amy-low +download_files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz + +parameters: + model: en-us-amy-low.onnx + +usage: | + To test if this model works as expected, you can use the following curl command: + + curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ + "model":"voice-en-us-amy-low", + "input": "Hi, this is a test." + }' \ No newline at end of file diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml new file mode 100644 index 00000000..d91e057c --- /dev/null +++ b/aio/gpu-8g/text-to-text.yaml @@ -0,0 +1,51 @@ +name: gpt-3.5-turbo +mmap: true +parameters: + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + +roles: + assistant_function_call: assistant + function: tool +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "hermes-2-pro-mistral", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml new file mode 100644 index 00000000..3d240681 --- /dev/null +++ b/aio/gpu-8g/vision.yaml @@ -0,0 +1,40 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: bakllava-mmproj.gguf +parameters: + model: bakllava.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: bakllava.gguf + uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf +- filename: bakllava-mmproj.gguf + uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 3add0972..383a2b77 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -248,7 +248,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup Role: r, RoleName: role, Content: i.StringContent, + FunctionCall: i.FunctionCall, FunctionName: i.Name, + LastMessage: messageIndex == (len(input.Messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), MessageIndex: messageIndex, } templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml new file mode 100644 index 00000000..84510d2a --- /dev/null +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -0,0 +1,51 @@ +name: hermes-2-pro-mistral +mmap: true +parameters: + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + +roles: + assistant_function_call: assistant + function: tool +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "hermes-2-pro-mistral", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/go.mod b/go.mod index b218ca41..f2c53e84 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,9 @@ require ( ) require ( + github.com/Masterminds/goutils v1.1.1 // indirect + github.com/Masterminds/semver/v3 v3.2.0 // indirect + github.com/Masterminds/sprig/v3 v3.2.3 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/aymanbagabas/go-osc52 v1.0.3 // indirect github.com/aymerick/douceur v0.2.0 // indirect @@ -66,11 +69,14 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect github.com/gorilla/css v1.0.0 // indirect + github.com/huandu/xstrings v1.3.3 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/microcosm-cc/bluemonday v1.0.26 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect + github.com/mitchellh/copystructure v1.0.0 // indirect + github.com/mitchellh/reflectwalk v1.0.0 // indirect github.com/muesli/reflow v0.3.0 // indirect github.com/muesli/termenv v0.13.0 // indirect github.com/nwaples/rardecode v1.1.0 // indirect @@ -81,12 +87,15 @@ require ( github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect + github.com/shopspring/decimal v1.2.0 // indirect + github.com/spf13/cast v1.3.1 // indirect github.com/ulikunitz/xz v0.5.9 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/yuin/goldmark v1.5.2 // indirect github.com/yuin/goldmark-emoji v1.0.1 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect + golang.org/x/crypto v0.14.0 // indirect golang.org/x/term v0.13.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect diff --git a/go.sum b/go.sum index a3ecade2..7238ceba 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,11 @@ github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= +github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= +github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= +github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= +github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= +github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= @@ -85,6 +91,7 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= @@ -95,7 +102,10 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= +github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= @@ -136,6 +146,10 @@ github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3r github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= +github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= +github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= @@ -210,9 +224,14 @@ github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFt github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= +github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= +github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -245,6 +264,7 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMx github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU= github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os= @@ -266,7 +286,12 @@ go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmY golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -274,14 +299,18 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -297,6 +326,8 @@ golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -311,6 +342,7 @@ golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= @@ -318,11 +350,13 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/model/loader.go b/pkg/model/loader.go index bea32fb7..c2c9df0e 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -10,6 +10,7 @@ import ( "sync" "text/template" + "github.com/Masterminds/sprig/v3" grammar "github.com/go-skynet/LocalAI/pkg/grammar" "github.com/go-skynet/LocalAI/pkg/grpc" process "github.com/mudler/go-processmanager" @@ -36,6 +37,9 @@ type ChatMessageTemplateData struct { FunctionName string Content string MessageIndex int + Function bool + FunctionCall interface{} + LastMessage bool } // Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go? @@ -261,7 +265,7 @@ func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateN } // Parse the template - tmpl, err := template.New("prompt").Parse(dat) + tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) if err != nil { return err } diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index cc514334..979b4d83 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -60,7 +60,23 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model } } default: - log.Warn().Msgf("[startup] failed resolving model '%s'", url) + if _, err := os.Stat(url); err == nil { + log.Debug().Msgf("[startup] resolved local model: %s", url) + // copy to modelPath + md5Name := utils.MD5(url) + + modelYAML, err := os.ReadFile(url) + if err != nil { + log.Error().Msgf("error loading model: %s", err.Error()) + continue + } + + if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil { + log.Error().Msgf("error loading model: %s", err.Error()) + } + } else { + log.Warn().Msgf("[startup] failed resolving model '%s'", url) + } } } } From 3cf64d1e7e835224da0ad5a3df5dcf8f675722f4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 Mar 2024 08:57:41 +0100 Subject: [PATCH 0164/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 082da33e..c58428f7 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- All-in-one image: https://github.com/mudler/LocalAI/issues/1855 - Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 - Upload file API: https://github.com/mudler/LocalAI/pull/1703 - Tools API support: https://github.com/mudler/LocalAI/pull/1715 From 743095b7d82b20998b4dde1e557292c7fcc6bd82 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Thu, 21 Mar 2024 22:08:33 +0100 Subject: [PATCH 0165/2895] docs(mac): improve documentation for mac build (#1873) * docs(mac): Improve documentation for mac build - added documentation to build from current master - added troubleshooting information Signed-off-by: Sebastian * docs(max): fix typo Signed-off-by: Sebastian --------- Signed-off-by: Sebastian --- docs/content/docs/getting-started/build.md | 27 +++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index b26a16d7..238bdbec 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -45,6 +45,8 @@ To install the dependencies follow the instructions below: {{< tabs tabTotal="3" >}} {{% tab tabName="Apple" %}} +Install `xcode` from the App Store + ```bash brew install abseil cmake go grpc protobuf wget ``` @@ -111,10 +113,12 @@ docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS ### Example: Build on mac -Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`. +Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`. The below has been tested by one mac user and found to work. Note that this doesn't use Docker to run the server: +Install `xcode` from the Apps Store (needed for metalkit) + ``` # install build dependencies brew install abseil cmake go grpc protobuf wget @@ -146,8 +150,20 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso }' ``` -### Build with Image generation support +#### Troublshooting mac +If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. +If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). +If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. + +``` +# reinstall build dependencies +brew reinstall abseil cmake go grpc protobuf wget + +make clean + +make build +``` **Requirements**: OpenCV, Gomp @@ -239,13 +255,12 @@ make BUILD_TYPE=sycl_f32 build # for float32 #### Metal (Apple Silicon) ``` -make BUILD_TYPE=metal build +make build -# Set `gpu_layers: 1` to your YAML model config file and `f16: true` -# Note: only models quantized with q4_0 are supported! +# correct build type is automatically used on mac (BUILD_TYPE=metal) +# Set `gpu_layers: 256` (or equal to the number of model layers) to your YAML model config file and `f16: true` ``` - ### Windows compatibility Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2 From abc9360dc62863af1c484f914cf2b0948169fb02 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 Mar 2024 22:09:04 +0100 Subject: [PATCH 0166/2895] feat(aio): entrypoint, update workflows (#1872) --- .github/workflows/image.yml | 5 ++ .github/workflows/image_build.yml | 68 ++++++++++++++++++++- Dockerfile.aio | 7 +-- Makefile | 5 +- aio/cpu/README.md | 5 ++ aio/cpu/embeddings.yaml | 13 ++-- aio/entrypoint.sh | 98 +++++++++++++++++++++++++++++++ aio/gpu-8g/embeddings.yaml | 4 +- aio/gpu-8g/image-gen.yaml | 2 +- 9 files changed, 191 insertions(+), 16 deletions(-) create mode 100644 aio/cpu/README.md create mode 100755 aio/entrypoint.sh diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 5ba0f1bf..8e2bbbdd 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -26,6 +26,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + aio: ${{ matrix.aio }} makeflags: "-j3" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} @@ -86,6 +87,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + aio: "-aio-gpu-nvidia-cuda-11" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -96,6 +98,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + aio: "-aio-gpu-nvidia-cuda-12" - build-type: '' #platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64' @@ -199,6 +202,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} makeflags: "-j3" secrets: @@ -217,6 +221,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' + aio: "-aio-cpu" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index a978f1bf..22f72131 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -51,6 +51,11 @@ on: required: false default: '' type: string + aio: + description: 'AIO Image Name' + required: false + default: '' + type: string secrets: dockerUsername: required: true @@ -129,7 +134,30 @@ jobs: flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.tag-suffix }} - + - name: Docker meta AIO (quay.io) + if: inputs.aio != '' + id: meta_aio + uses: docker/metadata-action@v5 + with: + images: | + quay.io/go-skynet/local-ai + tags: | + type=ref,event=branch + type=semver,pattern={{raw}} + flavor: | + suffix=${{ inputs.aio }} + - name: Docker meta AIO (dockerhub) + if: inputs.aio != '' + id: meta_aio_dockerhub + uses: docker/metadata-action@v5 + with: + images: | + localai/localai + tags: | + type=ref,event=branch + type=semver,pattern={{raw}} + flavor: | + suffix=${{ inputs.aio }} - name: Set up QEMU uses: docker/setup-qemu-action@master with: @@ -172,6 +200,44 @@ jobs: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + - + name: Inspect image + if: github.event_name != 'pull_request' + run: | + docker pull localai/localai:${{ steps.meta.outputs.version }} + docker image inspect localai/localai:${{ steps.meta.outputs.version }} + docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + - name: Build and push AIO image + if: inputs.aio != '' + uses: docker/build-push-action@v5 + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + context: . + file: ./Dockerfile.aio + platforms: ${{ inputs.platforms }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta_aio.outputs.tags }} + labels: ${{ steps.meta_aio.outputs.labels }} + - name: Build and push AIO image (dockerhub) + if: inputs.aio != '' + uses: docker/build-push-action@v5 + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} + context: . + file: ./Dockerfile.aio + platforms: ${{ inputs.platforms }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta_aio_dockerhub.outputs.tags }} + labels: ${{ steps.meta_aio_dockerhub.outputs.labels }} - name: job summary run: | echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY + - name: job summary(AIO) + if: inputs.aio != '' + run: | + echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/Dockerfile.aio b/Dockerfile.aio index 4097e6d5..81063bb4 100644 --- a/Dockerfile.aio +++ b/Dockerfile.aio @@ -1,9 +1,8 @@ ARG BASE_IMAGE=ubuntu:22.04 FROM ${BASE_IMAGE} -ARG SIZE=cpu -ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml" -COPY aio/${SIZE} /aio-models +RUN apt-get update && apt-get install -y pciutils && apt-get clean -ENTRYPOINT [ "/build/entrypoint.sh" ] \ No newline at end of file +COPY aio/ /aio +ENTRYPOINT [ "/aio/entrypoint.sh" ] \ No newline at end of file diff --git a/Makefile b/Makefile index c03091d0..96347307 100644 --- a/Makefile +++ b/Makefile @@ -536,7 +536,6 @@ grpcs: prepare $(GRPC_BACKENDS) DOCKER_IMAGE?=local-ai DOCKER_AIO_IMAGE?=local-ai-aio -DOCKER_AIO_SIZE?=cpu IMAGE_TYPE?=core BASE_IMAGE?=ubuntu:22.04 @@ -549,11 +548,9 @@ docker: -t $(DOCKER_IMAGE) . docker-aio: - @echo "Building AIO image with size $(DOCKER_AIO_SIZE)" - @echo "Building AIO image with base image $(BASE_IMAGE)" + @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" docker build \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ - --build-arg SIZE=$(DOCKER_AIO_SIZE) \ -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . docker-aio-all: diff --git a/aio/cpu/README.md b/aio/cpu/README.md new file mode 100644 index 00000000..8b0b1086 --- /dev/null +++ b/aio/cpu/README.md @@ -0,0 +1,5 @@ +## AIO CPU size + +Use this image with CPU-only. + +Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc). \ No newline at end of file diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml index 512d63a4..bdee079c 100644 --- a/aio/cpu/embeddings.yaml +++ b/aio/cpu/embeddings.yaml @@ -1,13 +1,18 @@ -name: all-minilm-l6-v2 -backend: sentencetransformers +backend: bert-embeddings embeddings: true +f16: true + +gpu_layers: 90 +mmap: true +name: text-embedding-ada-002 + parameters: - model: all-MiniLM-L6-v2 + model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin usage: | You can test this model with curl like this: curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ "input": "Your text string goes here", - "model": "all-minilm-l6-v2" + "model": "text-embedding-ada-002" }' \ No newline at end of file diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh new file mode 100755 index 00000000..8c15a5e4 --- /dev/null +++ b/aio/entrypoint.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +echo "===> LocalAI All-in-One (AIO) container starting..." + +GPU_ACCELERATION=false +GPU_VENDOR="" + +function detect_gpu() { + case "$(uname -s)" in + Linux) + if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then + echo "NVIDIA GPU detected" + # nvidia-smi should be installed in the container + if nvidia-smi; then + GPU_ACCELERATION=true + GPU_VENDOR=nvidia + else + echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available." + fi + elif lspci | grep -E 'VGA|3D' | grep -iq amd; then + echo "AMD GPU detected" + # Check if ROCm is installed + if [ -d /opt/rocm ]; then + GPU_ACCELERATION=true + GPU_VENDOR=amd + else + echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available." + fi + elif lspci | grep -E 'VGA|3D' | grep -iq intel; then + echo "Intel GPU detected" + if [ -d /opt/intel ]; then + GPU_ACCELERATION=true + else + echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available." + fi + fi + ;; + Darwin) + if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then + echo "Apple Metal supported GPU detected" + GPU_ACCELERATION=true + GPU_VENDOR=apple + fi + ;; + esac +} + +function detect_gpu_size() { + if [ "$GPU_ACCELERATION" = true ]; then + GPU_SIZE=gpu-8g + fi + + # Attempting to find GPU memory size for NVIDIA GPUs + if echo "$gpu_model" | grep -iq nvidia; then + echo "NVIDIA GPU detected. Attempting to find memory size..." + nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits)) + if [ ! -z "$nvidia_sm" ]; then + echo "Total GPU Memory: ${nvidia_sm[0]} MiB" + else + echo "Unable to determine NVIDIA GPU memory size." + fi + # if bigger than 8GB, use 16GB + #if [ "$nvidia_sm" -gt 8192 ]; then + # GPU_SIZE=gpu-16g + #fi + else + echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script." + fi + + # default to cpu if GPU_SIZE is not set + if [ -z "$GPU_SIZE" ]; then + GPU_SIZE=cpu + fi +} + +function check_vars() { + if [ -z "$MODELS" ]; then + echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load." + exit 1 + fi + + if [ -z "$SIZE" ]; then + echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple" + exit 1 + fi +} + +detect_gpu +detect_gpu_size + +SIZE=${SIZE:-$GPU_SIZE} # default to cpu +MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml} + +check_vars + +echo "Starting LocalAI with the following models: $MODELS" + +/build/entrypoint.sh "$@" \ No newline at end of file diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml index 512d63a4..98b519d5 100644 --- a/aio/gpu-8g/embeddings.yaml +++ b/aio/gpu-8g/embeddings.yaml @@ -1,4 +1,4 @@ -name: all-minilm-l6-v2 +name: text-embedding-ada-002 backend: sentencetransformers embeddings: true parameters: @@ -9,5 +9,5 @@ usage: | curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ "input": "Your text string goes here", - "model": "all-minilm-l6-v2" + "model": "text-embedding-ada-002" }' \ No newline at end of file diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml index 3857cd6b..74cefc1d 100644 --- a/aio/gpu-8g/image-gen.yaml +++ b/aio/gpu-8g/image-gen.yaml @@ -4,7 +4,7 @@ parameters: backend: diffusers step: 25 f16: true -cuda: true + diffusers: pipeline_type: StableDiffusionPipeline cuda: true From 418ba020259fe954333ae810bfaa13dc813b0bb1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 Mar 2024 09:14:17 +0100 Subject: [PATCH 0167/2895] ci: fix typo Signed-off-by: Ettore Di Giacinto --- .github/workflows/image_build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 22f72131..8f1386c6 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -227,7 +227,7 @@ jobs: with: builder: ${{ steps.buildx.outputs.name }} build-args: | - BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} + BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }} context: . file: ./Dockerfile.aio platforms: ${{ inputs.platforms }} @@ -240,4 +240,4 @@ jobs: - name: job summary(AIO) if: inputs.aio != '' run: | - echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY \ No newline at end of file + echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY From 07468c8786e8e4ad16d275b48367880e2326df16 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 22 Mar 2024 09:14:42 +0100 Subject: [PATCH 0168/2895] :arrow_up: Update ggerganov/llama.cpp (#1874) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96347307..1509ed93 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=1c51f98adcbad40e3c41f0a6ffadeb723190b417 +CPPLLAMA_VERSION?=d0a71233fbf8ade8ef06ad8e6b81d1d7b254895f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From dd84c29a3dbb367e9e98d39374fc1263ce11cbff Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 22 Mar 2024 09:14:56 +0100 Subject: [PATCH 0169/2895] :arrow_up: Update ggerganov/whisper.cpp (#1875) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1509ed93..8aedf897 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=79d5765e7e1a904d976adfd5636da7da43163eb3 +WHISPER_CPP_VERSION?=fff24a0148fe194df4997a738eeceddd724959c3 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 600152df2344d8ae04d3782534c5312ee535f43c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 Mar 2024 20:55:11 +0100 Subject: [PATCH 0170/2895] fix(config): pass by config options, respect defaults (#1878) This bug had the unpleasant effect that it ignored defaults passed by the CLI. For instance threads could be changed only via model config file. --- core/config/application_config.go | 15 +++++++++++++++ core/config/backend_config.go | 25 ++++++++++++++----------- core/startup/startup.go | 6 ++++-- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/core/config/application_config.go b/core/config/application_config.go index f25b4348..03242c3c 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -258,6 +258,21 @@ func WithApiKeys(apiKeys []string) AppOption { } } +// ToConfigLoaderOptions returns a slice of ConfigLoader Option. +// Some options defined at the application level are going to be passed as defaults for +// all the configuration for the models. +// This includes for instance the context size or the number of threads. +// If a model doesn't set configs directly to the config model file +// it will use the defaults defined here. +func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption { + return []ConfigLoaderOption{ + LoadOptionContextSize(o.ContextSize), + LoadOptionDebug(o.Debug), + LoadOptionF16(o.F16), + LoadOptionThreads(o.Threads), + } +} + // func WithMetrics(meter *metrics.Metrics) AppOption { // return func(o *StartupOptions) { // o.Metrics = meter diff --git a/core/config/backend_config.go b/core/config/backend_config.go index daaf0257..32e10a17 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -188,7 +188,14 @@ func (c *BackendConfig) FunctionToCall() string { return c.functionCallNameString } -func (cfg *BackendConfig) SetDefaults(debug bool, threads, ctx int, f16 bool) { +func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { + lo := &LoadOptions{} + lo.Apply(opts...) + + ctx := lo.ctxSize + threads := lo.threads + f16 := lo.f16 + debug := lo.debug defaultTopP := 0.7 defaultTopK := 80 defaultTemp := 0.9 @@ -333,9 +340,6 @@ func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { // Load a config file for a model func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - lo := &LoadOptions{} - lo.Apply(opts...) - // Load a config file if present after the model name cfg := &BackendConfig{ PredictionOptions: schema.PredictionOptions{ @@ -350,7 +354,9 @@ func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath // Try loading a model config file modelConfig := filepath.Join(modelPath, modelName+".yaml") if _, err := os.Stat(modelConfig); err == nil { - if err := cl.LoadBackendConfig(modelConfig); err != nil { + if err := cl.LoadBackendConfig( + modelConfig, opts..., + ); err != nil { return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) } cfgExisting, exists = cl.GetBackendConfig(modelName) @@ -360,7 +366,7 @@ func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath } } - cfg.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16) + cfg.SetDefaults(opts...) return cfg, nil } @@ -371,9 +377,6 @@ func NewBackendConfigLoader() *BackendConfigLoader { } } func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { - lo := &LoadOptions{} - lo.Apply(opts...) - c := &[]*BackendConfig{} f, err := os.ReadFile(file) if err != nil { @@ -384,7 +387,7 @@ func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendC } for _, cc := range *c { - cc.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16) + cc.SetDefaults(opts...) } return *c, nil @@ -403,7 +406,7 @@ func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, return nil, fmt.Errorf("cannot unmarshal config file: %w", err) } - c.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16) + c.SetDefaults(opts...) return c, nil } diff --git a/core/startup/startup.go b/core/startup/startup.go index 43e6646d..828eb7a7 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -58,12 +58,14 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode cl := config.NewBackendConfigLoader() ml := model.NewModelLoader(options.ModelPath) - if err := cl.LoadBackendConfigsFromPath(options.ModelPath); err != nil { + configLoaderOpts := options.ToConfigLoaderOptions() + + if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { log.Error().Msgf("error loading config files: %s", err.Error()) } if options.ConfigFile != "" { - if err := cl.LoadBackendConfigFile(options.ConfigFile); err != nil { + if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil { log.Error().Msgf("error loading config file: %s", err.Error()) } } From 3bec467a91071133f8f74e7ce04d997733ed51b9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 Mar 2024 21:12:48 +0100 Subject: [PATCH 0171/2895] feat(models): add phi-2-chat, llava-1.6, bakllava, cerbero (#1879) --- embedded/models/bakllava.yaml | 40 ++++++++++++++++++++++++++ embedded/models/cerbero.yaml | 24 ++++++++++++++++ embedded/models/llava-1.5.yaml | 33 +++++++++++++++++++++ embedded/models/llava-1.6-mistral.yaml | 33 +++++++++++++++++++++ embedded/models/llava-1.6-vicuna.yaml | 37 ++++++++++++++++++++++++ embedded/models/phi-2-chat.yaml | 25 ++++++++++++++++ 6 files changed, 192 insertions(+) create mode 100644 embedded/models/bakllava.yaml create mode 100644 embedded/models/cerbero.yaml create mode 100644 embedded/models/llava-1.5.yaml create mode 100644 embedded/models/llava-1.6-mistral.yaml create mode 100644 embedded/models/llava-1.6-vicuna.yaml create mode 100644 embedded/models/phi-2-chat.yaml diff --git a/embedded/models/bakllava.yaml b/embedded/models/bakllava.yaml new file mode 100644 index 00000000..52fd9466 --- /dev/null +++ b/embedded/models/bakllava.yaml @@ -0,0 +1,40 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: bakllava + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: bakllava-mmproj.gguf +parameters: + model: bakllava.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: bakllava.gguf + uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf +- filename: bakllava-mmproj.gguf + uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "bakllava", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/cerbero.yaml b/embedded/models/cerbero.yaml new file mode 100644 index 00000000..8ace4e35 --- /dev/null +++ b/embedded/models/cerbero.yaml @@ -0,0 +1,24 @@ +backend: llama +context_size: 8192 +f16: false +gpu_layers: 90 +name: cerbero +mmap: false +parameters: + model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf + top_k: 80 + temperature: 0.2 + top_p: 0.7 +template: + completion: "{{.Input}}" + chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] " +roles: + user: "[|Umano|] " + system: "[|Umano|] " + assistant: "[|Assistente|] " + +stopwords: +- "[|Umano|]" + +trimsuffix: +- "\n" \ No newline at end of file diff --git a/embedded/models/llava-1.5.yaml b/embedded/models/llava-1.5.yaml new file mode 100644 index 00000000..3db48524 --- /dev/null +++ b/embedded/models/llava-1.5.yaml @@ -0,0 +1,33 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava-1.5 + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf +parameters: + model: llava-v1.5-7b-Q4_K.gguf + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: llava-v1.5-7b-Q4_K.gguf + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf +- filename: llava-v1.5-7b-mmproj-Q8_0.gguf + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava-1.5", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/llava-1.6-mistral.yaml b/embedded/models/llava-1.6-mistral.yaml new file mode 100644 index 00000000..602ceb62 --- /dev/null +++ b/embedded/models/llava-1.6-mistral.yaml @@ -0,0 +1,33 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava-1.6-mistral + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: llava-v1.6-7b-mmproj-f16.gguf +parameters: + model: llava-v1.6-mistral-7b.gguf + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: llava-v1.6-mistral-7b.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf +- filename: llava-v1.6-7b-mmproj-f16.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava-1.6-mistral", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/llava-1.6-vicuna.yaml b/embedded/models/llava-1.6-vicuna.yaml new file mode 100644 index 00000000..cea33e7f --- /dev/null +++ b/embedded/models/llava-1.6-vicuna.yaml @@ -0,0 +1,37 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava-1.6-vicuna + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: mmproj-vicuna7b-f16.gguf +parameters: + model: vicuna-7b-q5_k.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: vicuna-7b-q5_k.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf +- filename: mmproj-vicuna7b-f16.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava-1.6-vicuna", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/phi-2-chat.yaml b/embedded/models/phi-2-chat.yaml new file mode 100644 index 00000000..4a3ca7aa --- /dev/null +++ b/embedded/models/phi-2-chat.yaml @@ -0,0 +1,25 @@ +name: phi-2-chat +mmap: true +parameters: + model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf + +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}} + <|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2-chat", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' From 4b1ee0c1701a125125a78ecde892ec7b5e903fbd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 Mar 2024 21:13:11 +0100 Subject: [PATCH 0172/2895] feat(aio): add tests, update model definitions (#1880) --- .github/workflows/test.yml | 52 +++++++++++ Makefile | 4 + aio/cpu/speech-to-text.yaml | 4 +- aio/cpu/text-to-speech.yaml | 2 +- aio/cpu/text-to-text.yaml | 31 ++++--- aio/cpu/vision.yaml | 4 +- aio/entrypoint.sh | 4 +- aio/gpu-8g/image-gen.yaml | 2 +- aio/gpu-8g/speech-to-text.yaml | 4 +- aio/gpu-8g/text-to-speech.yaml | 4 +- aio/gpu-8g/text-to-text.yaml | 4 +- aio/gpu-8g/vision.yaml | 19 ++-- go.mod | 25 +++++- go.sum | 93 +++++++++++++++++++ tests/e2e-aio/e2e_suite_test.go | 97 ++++++++++++++++++++ tests/e2e-aio/e2e_test.go | 152 ++++++++++++++++++++++++++++++++ 16 files changed, 461 insertions(+), 40 deletions(-) create mode 100644 tests/e2e-aio/e2e_suite_test.go create mode 100644 tests/e2e-aio/e2e_test.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8222508a..6d837821 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -110,6 +110,58 @@ jobs: uses: mxschmitt/action-tmate@v3 timeout-minutes: 5 + tests-aio-container: + runs-on: ubuntu-latest + steps: + - name: Release space from worker + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + sudo apt-get remove --auto-remove android-sdk-platform-tools || true + sudo apt-get purge --auto-remove android-sdk-platform-tools || true + sudo rm -rf /usr/local/lib/android + sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^mono-.*' || true + sudo apt-get remove -y '^ghc-.*' || true + sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + sudo apt-get remove -y 'php.*' || true + sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + sudo apt-get remove -y '^google-.*' || true + sudo apt-get remove -y azure-cli || true + sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + sudo apt-get remove -y '^gfortran-.*' || true + sudo apt-get autoremove -y + sudo apt-get clean + echo + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + sudo rm -rfv build || true + df -h + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - name: Build images + run: | + docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core -t local-ai:tests -f Dockerfile . + BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio + - name: Test + run: | + LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \ + make run-e2e-aio + - name: Setup tmate session if tests fail + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 + tests-apple: runs-on: macOS-14 strategy: diff --git a/Makefile b/Makefile index 8aedf897..95af1936 100644 --- a/Makefile +++ b/Makefile @@ -353,6 +353,10 @@ run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures) docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests +run-e2e-aio: + @echo 'Running e2e AIO tests' + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio + test-e2e: @echo 'Running e2e tests' BUILD_TYPE=$(BUILD_TYPE) \ diff --git a/aio/cpu/speech-to-text.yaml b/aio/cpu/speech-to-text.yaml index f7ebd217..77850d79 100644 --- a/aio/cpu/speech-to-text.yaml +++ b/aio/cpu/speech-to-text.yaml @@ -1,4 +1,4 @@ -name: whisper +name: whisper-1 backend: whisper parameters: model: ggml-whisper-base.bin @@ -10,7 +10,7 @@ usage: | ## Send the example audio file to the transcriptions endpoint curl http://localhost:8080/v1/audio/transcriptions \ -H "Content-Type: multipart/form-data" \ - -F file="@$PWD/gb1.ogg" -F model="whisper" + -F file="@$PWD/gb1.ogg" -F model="whisper-1" download_files: - filename: "ggml-whisper-base.bin" diff --git a/aio/cpu/text-to-speech.yaml b/aio/cpu/text-to-speech.yaml index 93c11403..91998e6a 100644 --- a/aio/cpu/text-to-speech.yaml +++ b/aio/cpu/text-to-speech.yaml @@ -1,4 +1,4 @@ -name: voice-en-us-amy-low +name: tts-1 download_files: - filename: voice-en-us-amy-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 7558ba9f..4fd88500 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -1,22 +1,25 @@ -name: gpt-3.5-turbo -context_size: 2048 -f16: true -gpu_layers: 90 +name: gpt-4 mmap: true -trimsuffix: -- "\n" parameters: - model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf + model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf template: - chat: &template |- - Instruct: {{.Input}} - Output: - completion: *template - + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}} + <|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 2048 +f16: true +stopwords: +- <|im_end|> +- usage: | - To use this model, interact with the API (in another terminal) with curl for instance: curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2", + "model": "phi-2-chat", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml index 3d240681..0777f715 100644 --- a/aio/cpu/vision.yaml +++ b/aio/cpu/vision.yaml @@ -4,7 +4,7 @@ f16: true gpu_layers: 90 mmap: true -name: llava +name: gpt-4-vision-preview roles: user: "USER:" @@ -36,5 +36,5 @@ download_files: usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llava", + "model": "gpt-4-vision-preview", "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index 8c15a5e4..b2f64f63 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -88,8 +88,8 @@ function check_vars() { detect_gpu detect_gpu_size -SIZE=${SIZE:-$GPU_SIZE} # default to cpu -MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml} +SIZE="${SIZE:-$GPU_SIZE}" # default to cpu +export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}" check_vars diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml index 74cefc1d..9868572f 100644 --- a/aio/gpu-8g/image-gen.yaml +++ b/aio/gpu-8g/image-gen.yaml @@ -1,4 +1,4 @@ -name: dreamshaper +name: stablediffusion parameters: model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors backend: diffusers diff --git a/aio/gpu-8g/speech-to-text.yaml b/aio/gpu-8g/speech-to-text.yaml index f7ebd217..77850d79 100644 --- a/aio/gpu-8g/speech-to-text.yaml +++ b/aio/gpu-8g/speech-to-text.yaml @@ -1,4 +1,4 @@ -name: whisper +name: whisper-1 backend: whisper parameters: model: ggml-whisper-base.bin @@ -10,7 +10,7 @@ usage: | ## Send the example audio file to the transcriptions endpoint curl http://localhost:8080/v1/audio/transcriptions \ -H "Content-Type: multipart/form-data" \ - -F file="@$PWD/gb1.ogg" -F model="whisper" + -F file="@$PWD/gb1.ogg" -F model="whisper-1" download_files: - filename: "ggml-whisper-base.bin" diff --git a/aio/gpu-8g/text-to-speech.yaml b/aio/gpu-8g/text-to-speech.yaml index 93c11403..8d875a29 100644 --- a/aio/gpu-8g/text-to-speech.yaml +++ b/aio/gpu-8g/text-to-speech.yaml @@ -1,4 +1,4 @@ -name: voice-en-us-amy-low +name: tts-1 download_files: - filename: voice-en-us-amy-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz @@ -10,6 +10,6 @@ usage: | To test if this model works as expected, you can use the following curl command: curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "model":"voice-en-us-amy-low", + "model":"tts-1", "input": "Hi, this is a test." }' \ No newline at end of file diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index d91e057c..c6f26c07 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -1,4 +1,4 @@ -name: gpt-3.5-turbo +name: gpt-4 mmap: true parameters: model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf @@ -46,6 +46,6 @@ stopwords: - usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "hermes-2-pro-mistral", + "model": "gpt-4", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml index 3d240681..02542503 100644 --- a/aio/gpu-8g/vision.yaml +++ b/aio/gpu-8g/vision.yaml @@ -4,23 +4,20 @@ f16: true gpu_layers: 90 mmap: true -name: llava +name: gpt-4-vision-preview roles: user: "USER:" assistant: "ASSISTANT:" system: "SYSTEM:" -mmproj: bakllava-mmproj.gguf +mmproj: llava-v1.6-7b-mmproj-f16.gguf parameters: - model: bakllava.gguf + model: llava-v1.6-mistral-7b.Q5_K_M.gguf temperature: 0.2 top_k: 40 top_p: 0.95 seed: -1 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 template: chat: | @@ -29,12 +26,12 @@ template: ASSISTANT: download_files: -- filename: bakllava.gguf - uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf -- filename: bakllava-mmproj.gguf - uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf +- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf +- filename: llava-v1.6-7b-mmproj-f16.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llava", + "model": "gpt-4-vision-preview", "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/go.mod b/go.mod index f2c53e84..8a43df1d 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.17.0 github.com/rs/zerolog v1.31.0 - github.com/sashabaranov/go-openai v1.16.0 + github.com/sashabaranov/go-openai v1.20.4 github.com/schollz/progressbar/v3 v3.13.1 github.com/stretchr/testify v1.8.4 github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701 @@ -53,21 +53,32 @@ require ( ) require ( + github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Masterminds/sprig/v3 v3.2.3 // indirect + github.com/Microsoft/go-winio v0.6.0 // indirect + github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/aymanbagabas/go-osc52 v1.0.3 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/charmbracelet/glamour v0.6.0 // indirect + github.com/containerd/continuity v0.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect + github.com/docker/cli v20.10.17+incompatible // indirect + github.com/docker/docker v20.10.7+incompatible // indirect + github.com/docker/go-connections v0.4.0 // indirect + github.com/docker/go-units v0.4.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gorilla/css v1.0.0 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/klauspost/pgzip v1.2.5 // indirect @@ -76,26 +87,38 @@ require ( github.com/microcosm-cc/bluemonday v1.0.26 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/copystructure v1.0.0 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/mitchellh/reflectwalk v1.0.0 // indirect + github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect github.com/muesli/reflow v0.3.0 // indirect github.com/muesli/termenv v0.13.0 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.0.2 // indirect + github.com/opencontainers/runc v1.1.5 // indirect + github.com/ory/dockertest/v3 v3.10.0 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/pkoukk/tiktoken-go v0.1.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect github.com/shopspring/decimal v1.2.0 // indirect + github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/cast v1.3.1 // indirect github.com/ulikunitz/xz v0.5.9 // indirect + github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect + github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect + github.com/xeipuuv/gojsonschema v1.2.0 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/yuin/goldmark v1.5.2 // indirect github.com/yuin/goldmark-emoji v1.0.1 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect golang.org/x/crypto v0.14.0 // indirect + golang.org/x/mod v0.12.0 // indirect golang.org/x/term v0.13.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect diff --git a/go.sum b/go.sum index 7238ceba..bef84d57 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,6 @@ +github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= +github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= @@ -6,6 +9,10 @@ github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7Y github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= +github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg= +github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE= +github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= +github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= @@ -17,27 +24,47 @@ github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuP github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4= +github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc= github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc= +github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= +github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= +github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg= +github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM= +github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0= github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M= +github.com/docker/cli v20.10.17+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= +github.com/docker/docker v20.10.7+incompatible h1:Z6O9Nhsjv+ayUEeI1IojKbYcsGdgYSNqxe1s2MYzUhQ= +github.com/docker/docker v20.10.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= +github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= +github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= +github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4= github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= +github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= @@ -66,8 +93,11 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw= github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= @@ -84,6 +114,7 @@ github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -91,6 +122,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -109,6 +142,8 @@ github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= @@ -116,8 +151,11 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= @@ -148,8 +186,14 @@ github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2Em github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= +github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk= +github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= +github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= @@ -180,6 +224,16 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/onsi/gomega v1.28.1 h1:MijcGUbfYuznzK/5R4CPNoUP/9Xvuo20sXfEm6XxoTA= github.com/onsi/gomega v1.28.1/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= +github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= +github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs= +github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= +github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4= +github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg= github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/otiai10/openaigo v1.6.0 h1:YTQEbtDSvawETOB/Kmb/6JvuHdHH/eIpSQfHVufiwY8= @@ -188,6 +242,8 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1H github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE= github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM= github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkoukk/tiktoken-go v0.1.2 h1:u7PCSBiWJ3nJYoTGShyM9iHXz4dNyYkurwwp+GHtyHY= github.com/pkoukk/tiktoken-go v0.1.2/go.mod h1:boMWvk9pQCOTx11pgu0DrIdrAKgQzzJKUP6vLXaz7Rw= @@ -211,12 +267,16 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sashabaranov/go-openai v1.16.0 h1:34W6WV84ey6OpW0p2UewZkdMu82AxGC+BzpU6iiauRw= github.com/sashabaranov/go-openai v1.16.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= +github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= +github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= +github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4= github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E= github.com/shirou/gopsutil/v3 v3.23.9/go.mod h1:x/NWSb71eMcjFIO0vhyGW5nZ7oSIgVjrCnADckb85GA= @@ -226,8 +286,12 @@ github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -240,6 +304,7 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= @@ -251,6 +316,7 @@ github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701/go.mod h1:SiwyRS7s github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs= github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= @@ -259,10 +325,19 @@ github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= +github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= +github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU= @@ -290,15 +365,19 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -309,23 +388,32 @@ golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -354,8 +442,11 @@ golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= @@ -375,6 +466,7 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -393,3 +485,4 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go new file mode 100644 index 00000000..00fc6d2a --- /dev/null +++ b/tests/e2e-aio/e2e_suite_test.go @@ -0,0 +1,97 @@ +package e2e_test + +import ( + "context" + "fmt" + "os" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/ory/dockertest/v3" + "github.com/ory/dockertest/v3/docker" + "github.com/sashabaranov/go-openai" +) + +var pool *dockertest.Pool +var resource *dockertest.Resource +var client *openai.Client + +var containerImage = os.Getenv("LOCALAI_IMAGE") +var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG") +var modelsDir = os.Getenv("LOCALAI_MODELS_DIR") +var apiPort = os.Getenv("LOCALAI_API_PORT") + +func TestLocalAI(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI E2E test suite") +} + +var _ = BeforeSuite(func() { + + if containerImage == "" { + Fail("LOCALAI_IMAGE is not set") + } + if containerImageTag == "" { + Fail("LOCALAI_IMAGE_TAG is not set") + } + if apiPort == "" { + apiPort = "8080" + } + + p, err := dockertest.NewPool("") + Expect(err).To(Not(HaveOccurred())) + Expect(p.Client.Ping()).To(Succeed()) + + pool = p + + // get cwd + cwd, err := os.Getwd() + Expect(err).To(Not(HaveOccurred())) + md := cwd + "/models" + + if modelsDir != "" { + md = modelsDir + } + + proc := runtime.NumCPU() + options := &dockertest.RunOptions{ + Repository: containerImage, + Tag: containerImageTag, + // Cmd: []string{"server", "/data"}, + PortBindings: map[docker.Port][]docker.PortBinding{ + "8080/tcp": []docker.PortBinding{{HostPort: apiPort}}, + }, + Env: []string{"MODELS_PATH=/models", "DEBUG=true", "THREADS=" + fmt.Sprint(proc)}, + Mounts: []string{md + ":/models"}, + } + + r, err := pool.RunWithOptions(options) + Expect(err).To(Not(HaveOccurred())) + + resource = r + + defaultConfig := openai.DefaultConfig("") + defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1" + + // Wait for API to be ready + client = openai.NewClientWithConfig(defaultConfig) + + Eventually(func() error { + _, err := client.ListModels(context.TODO()) + return err + }, "20m").ShouldNot(HaveOccurred()) +}) + +var _ = AfterSuite(func() { + Expect(pool.Purge(resource)).To(Succeed()) + //dat, err := os.ReadFile(resource.Container.LogPath) + //Expect(err).To(Not(HaveOccurred())) + //Expect(string(dat)).To(ContainSubstring("GRPC Service Ready")) + //fmt.Println(string(dat)) +}) + +var _ = AfterEach(func() { + //Expect(dbClient.Clear()).To(Succeed()) +}) diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go new file mode 100644 index 00000000..03d9fda9 --- /dev/null +++ b/tests/e2e-aio/e2e_test.go @@ -0,0 +1,152 @@ +package e2e_test + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/sashabaranov/go-openai" +) + +var _ = Describe("E2E test", func() { + Context("Generating", func() { + BeforeEach(func() { + // + }) + + // Check that the GPU was used + AfterEach(func() { + // + }) + + Context("text", func() { + It("correctly", func() { + model := "gpt-4" + resp, err := client.CreateChatCompletion(context.TODO(), + openai.ChatCompletionRequest{ + Model: model, Messages: []openai.ChatCompletionMessage{ + { + Role: "user", + Content: "How much is 2+2?", + }, + }}) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content)) + }) + }) + Context("images", func() { + It("correctly", func() { + resp, err := client.CreateImage(context.TODO(), + openai.ImageRequest{ + Prompt: "test", + Size: openai.CreateImageSize512x512, + //ResponseFormat: openai.CreateImageResponseFormatURL, + }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp)) + Expect(resp.Data[0].URL).To(ContainSubstring("http://localhost:8080"), fmt.Sprint(resp.Data[0].URL)) + }) + }) + Context("embeddings", func() { + It("correctly", func() { + resp, err := client.CreateEmbeddings(context.TODO(), + openai.EmbeddingRequestStrings{ + Input: []string{"doc"}, + Model: openai.AdaEmbeddingV2, + }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp)) + Expect(resp.Data[0].Embedding).ToNot(BeEmpty()) + }) + }) + Context("vision", func() { + It("correctly", func() { + model := "gpt-4-vision-preview" + resp, err := client.CreateChatCompletion(context.TODO(), + openai.ChatCompletionRequest{ + Model: model, Messages: []openai.ChatCompletionMessage{ + { + + Role: "user", + MultiContent: []openai.ChatMessagePart{ + { + Type: openai.ChatMessagePartTypeText, + Text: "What is in the image?", + }, + { + Type: openai.ChatMessagePartTypeImageURL, + ImageURL: &openai.ChatMessageImageURL{ + URL: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + Detail: openai.ImageURLDetailLow, + }, + }, + }, + }, + }}) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("wooden"), ContainSubstring("grass")), fmt.Sprint(resp.Choices[0].Message.Content)) + }) + }) + Context("text to audio", func() { + It("correctly", func() { + res, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{ + Model: openai.TTSModel1, + Input: "Hello!", + Voice: openai.VoiceAlloy, + }) + Expect(err).ToNot(HaveOccurred()) + defer res.Close() + + _, err = io.ReadAll(res) + Expect(err).ToNot(HaveOccurred()) + + }) + }) + Context("audio to text", func() { + It("correctly", func() { + + downloadURL := "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav" + file, err := downloadHttpFile(downloadURL) + Expect(err).ToNot(HaveOccurred()) + + req := openai.AudioRequest{ + Model: openai.Whisper1, + FilePath: file, + } + resp, err := client.CreateTranscription(context.Background(), req) + Expect(err).ToNot(HaveOccurred()) + Expect(resp.Text).To(ContainSubstring("This is the"), fmt.Sprint(resp.Text)) + }) + }) + }) +}) + +func downloadHttpFile(url string) (string, error) { + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + tmpfile, err := os.CreateTemp("", "example") + if err != nil { + return "", err + } + defer tmpfile.Close() + + _, err = io.Copy(tmpfile, resp.Body) + if err != nil { + return "", err + } + + return tmpfile.Name(), nil +} From 643d85d2cc3741d5e461302e7fc5c1f75bb2b8a0 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Fri, 22 Mar 2024 20:14:04 +0000 Subject: [PATCH 0173/2895] feat(stores): Vector store backend (#1795) Add simple vector store backend Signed-off-by: Richard Palethorpe --- .editorconfig | 31 + Makefile | 13 +- backend/backend.proto | 46 +- backend/go/stores/debug.go | 14 + backend/go/stores/main.go | 26 + backend/go/stores/production.go | 7 + backend/go/stores/store.go | 507 +++++++ core/backend/stores.go | 23 + core/http/api.go | 7 + core/http/api_test.go | 142 ++ core/http/endpoints/localai/stores.go | 121 ++ core/schema/localai.go | 37 + docs/content/docs/features/stores.md | 97 ++ docs/content/docs/overview.md | 1 + examples/semantic-todo/README.md | 15 + examples/semantic-todo/go.mod | 18 + examples/semantic-todo/go.sum | 50 + examples/semantic-todo/main.go | 352 +++++ pkg/grpc/backend.go | 5 + pkg/grpc/base/base.go | 16 + pkg/grpc/client.go | 64 + pkg/grpc/embed.go | 16 + pkg/grpc/interface.go | 5 + pkg/grpc/proto/backend.pb.go | 1426 +++++++++++++------ pkg/grpc/proto/backend_grpc.pb.go | 201 ++- pkg/grpc/server.go | 48 + pkg/model/initializers.go | 3 + pkg/store/client.go | 155 ++ tests/integration/integration_suite_test.go | 17 + tests/integration/stores_test.go | 228 +++ 30 files changed, 3250 insertions(+), 441 deletions(-) create mode 100644 .editorconfig create mode 100644 backend/go/stores/debug.go create mode 100644 backend/go/stores/main.go create mode 100644 backend/go/stores/production.go create mode 100644 backend/go/stores/store.go create mode 100644 core/backend/stores.go create mode 100644 core/http/endpoints/localai/stores.go create mode 100644 docs/content/docs/features/stores.md create mode 100644 examples/semantic-todo/README.md create mode 100644 examples/semantic-todo/go.mod create mode 100644 examples/semantic-todo/go.sum create mode 100644 examples/semantic-todo/main.go create mode 100644 pkg/store/client.go create mode 100644 tests/integration/integration_suite_test.go create mode 100644 tests/integration/stores_test.go diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..b66f3645 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,31 @@ + +root = true + +[*] +indent_style = space +indent_size = 2 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.go] +indent_style = tab + +[Makefile] +indent_style = tab + +[*.proto] +indent_size = 2 + +[*.py] +indent_size = 4 + +[*.js] +indent_size = 2 + +[*.yaml] +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false diff --git a/Makefile b/Makefile index 95af1936..94b5570c 100644 --- a/Makefile +++ b/Makefile @@ -159,6 +159,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper +ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) @@ -333,7 +334,7 @@ prepare-test: grpcs test: prepare test-models/testmodel.ggml grpcs @echo 'Running tests' - export GO_TAGS="tts stablediffusion" + export GO_TAGS="tts stablediffusion debug" $(MAKE) prepare-test HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) @@ -387,6 +388,11 @@ test-stablediffusion: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS) +test-stores: backend-assets/grpc/local-store + mkdir -p tests/integration/backend-assets/grpc + cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/ + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration + test-container: docker build --target requirements -t local-ai-test-container . docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container @@ -536,6 +542,9 @@ backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper. CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/ +backend-assets/grpc/local-store: backend-assets/grpc + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/ + grpcs: prepare $(GRPC_BACKENDS) DOCKER_IMAGE?=local-ai @@ -573,4 +582,4 @@ docker-image-intel-xpu: --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ - --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . \ No newline at end of file + --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . diff --git a/backend/backend.proto b/backend/backend.proto index 30e2f8b2..c3d3180b 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -18,6 +18,48 @@ service Backend { rpc TTS(TTSRequest) returns (Result) {} rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {} rpc Status(HealthMessage) returns (StatusResponse) {} + + rpc StoresSet(StoresSetOptions) returns (Result) {} + rpc StoresDelete(StoresDeleteOptions) returns (Result) {} + rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {} + rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {} +} + +message StoresKey { + repeated float Floats = 1; +} + +message StoresValue { + bytes Bytes = 1; +} + +message StoresSetOptions { + repeated StoresKey Keys = 1; + repeated StoresValue Values = 2; +} + +message StoresDeleteOptions { + repeated StoresKey Keys = 1; +} + +message StoresGetOptions { + repeated StoresKey Keys = 1; +} + +message StoresGetResult { + repeated StoresKey Keys = 1; + repeated StoresValue Values = 2; +} + +message StoresFindOptions { + StoresKey Key = 1; + int32 TopK = 2; +} + +message StoresFindResult { + repeated StoresKey Keys = 1; + repeated StoresValue Values = 2; + repeated float Similarities = 3; } message HealthMessage {} @@ -121,7 +163,7 @@ message ModelOptions { bool NoMulMatQ = 37; string DraftModel = 39; - + string AudioPath = 38; // vllm @@ -213,4 +255,4 @@ message StatusResponse { } State state = 1; MemoryUsageData memory = 2; -} \ No newline at end of file +} diff --git a/backend/go/stores/debug.go b/backend/go/stores/debug.go new file mode 100644 index 00000000..6f0b8ba8 --- /dev/null +++ b/backend/go/stores/debug.go @@ -0,0 +1,14 @@ +//go:build debug +// +build debug + +package main + +import ( + "github.com/rs/zerolog/log" +) + +func assert(cond bool, msg string) { + if !cond { + log.Fatal().Stack().Msg(msg) + } +} diff --git a/backend/go/stores/main.go b/backend/go/stores/main.go new file mode 100644 index 00000000..9a113d79 --- /dev/null +++ b/backend/go/stores/main.go @@ -0,0 +1,26 @@ +package main + +// Note: this is started internally by LocalAI and a server is allocated for each store + +import ( + "flag" + "os" + + grpc "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +func main() { + log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) + + flag.Parse() + + if err := grpc.StartServer(*addr, NewStore()); err != nil { + panic(err) + } +} diff --git a/backend/go/stores/production.go b/backend/go/stores/production.go new file mode 100644 index 00000000..418b6397 --- /dev/null +++ b/backend/go/stores/production.go @@ -0,0 +1,7 @@ +//go:build !debug +// +build !debug + +package main + +func assert(cond bool, msg string) { +} diff --git a/backend/go/stores/store.go b/backend/go/stores/store.go new file mode 100644 index 00000000..9be31df8 --- /dev/null +++ b/backend/go/stores/store.go @@ -0,0 +1,507 @@ +package main + +// This is a wrapper to statisfy the GRPC service interface +// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) +import ( + "container/heap" + "fmt" + "math" + "slices" + + "github.com/go-skynet/LocalAI/pkg/grpc/base" + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + + "github.com/rs/zerolog/log" +) + +type Store struct { + base.SingleThread + + // The sorted keys + keys [][]float32 + // The sorted values + values [][]byte + + // If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions + // TODO: Should we normalize incoming keys if they are not instead? + keysAreNormalized bool + // The first key decides the length of the keys + keyLen int +} + +// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because +// that's theoretically best for memory layout and cache locality, but this isn't optimized yet. +type Pair struct { + Key []float32 + Value []byte +} + +func NewStore() *Store { + return &Store{ + keys: make([][]float32, 0), + values: make([][]byte, 0), + keysAreNormalized: true, + keyLen: -1, + } +} + +func compareSlices(k1, k2 []float32) int { + assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2))) + + return slices.Compare(k1, k2) +} + +func hasKey(unsortedSlice [][]float32, target []float32) bool { + return slices.ContainsFunc(unsortedSlice, func(k []float32) bool { + return compareSlices(k, target) == 0 + }) +} + +func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) { + return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int { + return compareSlices(k, t) + }) +} + +func isSortedPairs(kvs []Pair) bool { + for i := 1; i < len(kvs); i++ { + if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 { + return false + } + } + + return true +} + +func isSortedKeys(keys [][]float32) bool { + for i := 1; i < len(keys); i++ { + if compareSlices(keys[i-1], keys[i]) > 0 { + return false + } + } + + return true +} + +func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 { + ks := make([][]float32, len(keys)) + + for i, k := range keys { + ks[i] = k.Floats + } + + slices.SortFunc(ks, compareSlices) + + assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys))) + assert(isSortedKeys(ks), "keys are not sorted") + + return ks +} + +func (s *Store) Load(opts *pb.ModelOptions) error { + return nil +} + +// Sort the incoming kvs and merge them with the existing sorted kvs +func (s *Store) StoresSet(opts *pb.StoresSetOptions) error { + if len(opts.Keys) == 0 { + return fmt.Errorf("no keys to add") + } + + if len(opts.Keys) != len(opts.Values) { + return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values)) + } + + if s.keyLen == -1 { + s.keyLen = len(opts.Keys[0].Floats) + } else { + if len(opts.Keys[0].Floats) != s.keyLen { + return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen) + } + } + + kvs := make([]Pair, len(opts.Keys)) + + for i, k := range opts.Keys { + if s.keysAreNormalized && !isNormalized(k.Floats) { + s.keysAreNormalized = false + var sample []float32 + if len(s.keys) > 5 { + sample = k.Floats[:5] + } else { + sample = k.Floats + } + log.Debug().Msgf("Key is not normalized: %v", sample) + } + + kvs[i] = Pair{ + Key: k.Floats, + Value: opts.Values[i].Bytes, + } + } + + slices.SortFunc(kvs, func(a, b Pair) int { + return compareSlices(a.Key, b.Key) + }) + + assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys))) + assert(isSortedPairs(kvs), "keys are not sorted") + + l := len(kvs) + len(s.keys) + merge_ks := make([][]float32, 0, l) + merge_vs := make([][]byte, 0, l) + + i, j := 0, 0 + for { + if i+j >= l { + break + } + + if i >= len(kvs) { + merge_ks = append(merge_ks, s.keys[j]) + merge_vs = append(merge_vs, s.values[j]) + j++ + continue + } + + if j >= len(s.keys) { + merge_ks = append(merge_ks, kvs[i].Key) + merge_vs = append(merge_vs, kvs[i].Value) + i++ + continue + } + + c := compareSlices(kvs[i].Key, s.keys[j]) + if c < 0 { + merge_ks = append(merge_ks, kvs[i].Key) + merge_vs = append(merge_vs, kvs[i].Value) + i++ + } else if c > 0 { + merge_ks = append(merge_ks, s.keys[j]) + merge_vs = append(merge_vs, s.values[j]) + j++ + } else { + merge_ks = append(merge_ks, kvs[i].Key) + merge_vs = append(merge_vs, kvs[i].Value) + i++ + j++ + } + } + + assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l)) + assert(isSortedKeys(merge_ks), "merge keys are not sorted") + + s.keys = merge_ks + s.values = merge_vs + + return nil +} + +func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error { + if len(opts.Keys) == 0 { + return fmt.Errorf("no keys to delete") + } + + if len(opts.Keys) == 0 { + return fmt.Errorf("no keys to add") + } + + if s.keyLen == -1 { + s.keyLen = len(opts.Keys[0].Floats) + } else { + if len(opts.Keys[0].Floats) != s.keyLen { + return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen) + } + } + + ks := sortIntoKeySlicese(opts.Keys) + + l := len(s.keys) - len(ks) + merge_ks := make([][]float32, 0, l) + merge_vs := make([][]byte, 0, l) + + tail_ks := s.keys + tail_vs := s.values + for _, k := range ks { + j, found := findInSortedSlice(tail_ks, k) + + if found { + merge_ks = append(merge_ks, tail_ks[:j]...) + merge_vs = append(merge_vs, tail_vs[:j]...) + tail_ks = tail_ks[j+1:] + tail_vs = tail_vs[j+1:] + } else { + assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k)) + } + + log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs)) + } + + merge_ks = append(merge_ks, tail_ks...) + merge_vs = append(merge_vs, tail_vs...) + + assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys))) + + s.keys = merge_ks + s.values = merge_vs + + assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l)) + assert(isSortedKeys(s.keys), "keys are not sorted") + assert(func() bool { + for _, k := range ks { + if _, found := findInSortedSlice(s.keys, k); found { + return false + } + } + return true + }(), "Keys to delete still present") + + if len(s.keys) != l { + log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l) + } + + return nil +} + +func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) { + pbKeys := make([]*pb.StoresKey, 0, len(opts.Keys)) + pbValues := make([]*pb.StoresValue, 0, len(opts.Keys)) + ks := sortIntoKeySlicese(opts.Keys) + + if len(s.keys) == 0 { + log.Debug().Msgf("Get: No keys in store") + } + + if s.keyLen == -1 { + s.keyLen = len(opts.Keys[0].Floats) + } else { + if len(opts.Keys[0].Floats) != s.keyLen { + return pb.StoresGetResult{}, fmt.Errorf("Try to get a key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen) + } + } + + tail_k := s.keys + tail_v := s.values + for i, k := range ks { + j, found := findInSortedSlice(tail_k, k) + + if found { + pbKeys = append(pbKeys, &pb.StoresKey{ + Floats: k, + }) + pbValues = append(pbValues, &pb.StoresValue{ + Bytes: tail_v[j], + }) + + tail_k = tail_k[j+1:] + tail_v = tail_v[j+1:] + } else { + assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: i=%d, %v", i, k)) + } + } + + if len(pbKeys) != len(opts.Keys) { + log.Debug().Msgf("Get: Some keys not found: len(pbKeys) = %d, len(opts.Keys) = %d, len(s.Keys) = %d", len(pbKeys), len(opts.Keys), len(s.keys)) + } + + return pb.StoresGetResult{ + Keys: pbKeys, + Values: pbValues, + }, nil +} + +func isNormalized(k []float32) bool { + var sum float32 + for _, v := range k { + sum += v + } + + return sum == 1.0 +} + +// TODO: This we could replace with handwritten SIMD code +func normalizedCosineSimilarity(k1, k2 []float32) float32 { + assert(len(k1) == len(k2), fmt.Sprintf("normalizedCosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2))) + + var dot float32 + for i := 0; i < len(k1); i++ { + dot += k1[i] * k2[i] + } + + assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot)) + + // 2.0 * (1.0 - dot) would be the Euclidean distance + return dot +} + +type PriorityItem struct { + Similarity float32 + Key []float32 + Value []byte +} + +type PriorityQueue []*PriorityItem + +func (pq PriorityQueue) Len() int { return len(pq) } + +func (pq PriorityQueue) Less(i, j int) bool { + // Inverted because the most similar should be at the top + return pq[i].Similarity < pq[j].Similarity +} + +func (pq PriorityQueue) Swap(i, j int) { + pq[i], pq[j] = pq[j], pq[i] +} + +func (pq *PriorityQueue) Push(x any) { + item := x.(*PriorityItem) + *pq = append(*pq, item) +} + +func (pq *PriorityQueue) Pop() any { + old := *pq + n := len(old) + item := old[n-1] + *pq = old[0 : n-1] + return item +} + +func (s *Store) StoresFindNormalized(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) { + tk := opts.Key.Floats + top_ks := make(PriorityQueue, 0, int(opts.TopK)) + heap.Init(&top_ks) + + for i, k := range s.keys { + sim := normalizedCosineSimilarity(tk, k) + heap.Push(&top_ks, &PriorityItem{ + Similarity: sim, + Key: k, + Value: s.values[i], + }) + + if top_ks.Len() > int(opts.TopK) { + heap.Pop(&top_ks) + } + } + + similarities := make([]float32, top_ks.Len()) + pbKeys := make([]*pb.StoresKey, top_ks.Len()) + pbValues := make([]*pb.StoresValue, top_ks.Len()) + + for i := top_ks.Len() - 1; i >= 0; i-- { + item := heap.Pop(&top_ks).(*PriorityItem) + + similarities[i] = item.Similarity + pbKeys[i] = &pb.StoresKey{ + Floats: item.Key, + } + pbValues[i] = &pb.StoresValue{ + Bytes: item.Value, + } + } + + return pb.StoresFindResult{ + Keys: pbKeys, + Values: pbValues, + Similarities: similarities, + }, nil +} + +func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 { + assert(len(k1) == len(k2), fmt.Sprintf("cosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2))) + + var dot, mag2 float64 + for i := 0; i < len(k1); i++ { + dot += float64(k1[i] * k2[i]) + mag2 += float64(k2[i] * k2[i]) + } + + sim := float32(dot / (mag1 * math.Sqrt(mag2))) + + assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim)) + + return sim +} + +func (s *Store) StoresFindFallback(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) { + tk := opts.Key.Floats + top_ks := make(PriorityQueue, 0, int(opts.TopK)) + heap.Init(&top_ks) + + var mag1 float64 + for _, v := range tk { + mag1 += float64(v * v) + } + mag1 = math.Sqrt(mag1) + + for i, k := range s.keys { + dist := cosineSimilarity(tk, k, mag1) + heap.Push(&top_ks, &PriorityItem{ + Similarity: dist, + Key: k, + Value: s.values[i], + }) + + if top_ks.Len() > int(opts.TopK) { + heap.Pop(&top_ks) + } + } + + similarities := make([]float32, top_ks.Len()) + pbKeys := make([]*pb.StoresKey, top_ks.Len()) + pbValues := make([]*pb.StoresValue, top_ks.Len()) + + for i := top_ks.Len() - 1; i >= 0; i-- { + item := heap.Pop(&top_ks).(*PriorityItem) + + similarities[i] = item.Similarity + pbKeys[i] = &pb.StoresKey{ + Floats: item.Key, + } + pbValues[i] = &pb.StoresValue{ + Bytes: item.Value, + } + } + + return pb.StoresFindResult{ + Keys: pbKeys, + Values: pbValues, + Similarities: similarities, + }, nil +} + +func (s *Store) StoresFind(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) { + tk := opts.Key.Floats + + if len(tk) != s.keyLen { + return pb.StoresFindResult{}, fmt.Errorf("Try to find key with length %d when existing length is %d", len(tk), s.keyLen) + } + + if opts.TopK < 1 { + return pb.StoresFindResult{}, fmt.Errorf("opts.TopK = %d, must be >= 1", opts.TopK) + } + + if s.keyLen == -1 { + s.keyLen = len(opts.Key.Floats) + } else { + if len(opts.Key.Floats) != s.keyLen { + return pb.StoresFindResult{}, fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Key.Floats), s.keyLen) + } + } + + if s.keysAreNormalized && isNormalized(tk) { + return s.StoresFindNormalized(opts) + } else { + if s.keysAreNormalized { + var sample []float32 + if len(s.keys) > 5 { + sample = tk[:5] + } else { + sample = tk + } + log.Debug().Msgf("Trying to compare non-normalized key with normalized keys: %v", sample) + } + + return s.StoresFindFallback(opts) + } +} diff --git a/core/backend/stores.go b/core/backend/stores.go new file mode 100644 index 00000000..7b69d1bd --- /dev/null +++ b/core/backend/stores.go @@ -0,0 +1,23 @@ +package backend + +import ( + "github.com/go-skynet/LocalAI/core/config" + + "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/go-skynet/LocalAI/pkg/model" +) + +func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) { + if storeName == "" { + storeName = "default" + } + + sc := []model.Option{ + model.WithBackendString(model.LocalStoreBackend), + model.WithAssetDir(appConfig.AssetsDestination), + model.WithModel(storeName), + } + + return sl.BackendLoader(sc...) +} + diff --git a/core/http/api.go b/core/http/api.go index 8578b89e..039e835b 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -172,6 +172,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Elevenlabs app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) + // Stores + sl := model.NewModelLoader("") + app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) + app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) + app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) + app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) + // openAI compatible API endpoint // chat diff --git a/core/http/api_test.go b/core/http/api_test.go index ca69e8bf..804c15fe 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -15,6 +15,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" . "github.com/go-skynet/LocalAI/core/http" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/startup" "github.com/go-skynet/LocalAI/pkg/downloader" @@ -122,6 +123,75 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[ return } +func postRequestJSON[B any](url string, bodyJson *B) error { + payload, err := json.Marshal(bodyJson) + if err != nil { + return err + } + + GinkgoWriter.Printf("POST %s: %s\n", url, string(payload)) + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload)) + if err != nil { + return err + } + + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return err + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + if resp.StatusCode < 200 || resp.StatusCode >= 400 { + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) + } + + return nil +} + +func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *B2) error { + payload, err := json.Marshal(reqJson) + if err != nil { + return err + } + + GinkgoWriter.Printf("POST %s: %s\n", url, string(payload)) + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload)) + if err != nil { + return err + } + + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + if resp.StatusCode < 200 || resp.StatusCode >= 400 { + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) + } + + return json.Unmarshal(body, respJson) +} + //go:embed backend-assets/* var backendAssets embed.FS @@ -836,6 +906,78 @@ var _ = Describe("API test", func() { Expect(tokens).ToNot(Or(Equal(1), Equal(0))) }) }) + + // See tests/integration/stores_test + Context("Stores", Label("stores"), func() { + + It("sets, gets, finds and deletes entries", func() { + ks := [][]float32{ + {0.1, 0.2, 0.3}, + {0.4, 0.5, 0.6}, + {0.7, 0.8, 0.9}, + } + vs := []string{ + "test1", + "test2", + "test3", + } + setBody := schema.StoresSet{ + Keys: ks, + Values: vs, + } + + url := "http://127.0.0.1:9090/stores/" + err := postRequestJSON(url+"set", &setBody) + Expect(err).ToNot(HaveOccurred()) + + getBody := schema.StoresGet{ + Keys: ks, + } + var getRespBody schema.StoresGetResponse + err = postRequestResponseJSON(url+"get", &getBody, &getRespBody) + Expect(err).ToNot(HaveOccurred()) + Expect(len(getRespBody.Keys)).To(Equal(len(ks))) + + for i, v := range getRespBody.Keys { + if v[0] == 0.1 { + Expect(getRespBody.Values[i]).To(Equal("test1")) + } else if v[0] == 0.4 { + Expect(getRespBody.Values[i]).To(Equal("test2")) + } else { + Expect(getRespBody.Values[i]).To(Equal("test3")) + } + } + + deleteBody := schema.StoresDelete{ + Keys: [][]float32{ + {0.1, 0.2, 0.3}, + }, + } + err = postRequestJSON(url+"delete", &deleteBody) + Expect(err).ToNot(HaveOccurred()) + + findBody := schema.StoresFind{ + Key: []float32{0.1, 0.3, 0.7}, + Topk: 10, + } + + var findRespBody schema.StoresFindResponse + err = postRequestResponseJSON(url+"find", &findBody, &findRespBody) + Expect(err).ToNot(HaveOccurred()) + Expect(len(findRespBody.Keys)).To(Equal(2)) + + for i, v := range findRespBody.Keys { + if v[0] == 0.4 { + Expect(findRespBody.Values[i]).To(Equal("test2")) + } else { + Expect(findRespBody.Values[i]).To(Equal("test3")) + } + + Expect(findRespBody.Similarities[i]).To(BeNumerically(">=", -1)) + Expect(findRespBody.Similarities[i]).To(BeNumerically("<=", 1)) + } + }) + }) }) Context("Config file", func() { diff --git a/core/http/endpoints/localai/stores.go b/core/http/endpoints/localai/stores.go new file mode 100644 index 00000000..c8abfdb1 --- /dev/null +++ b/core/http/endpoints/localai/stores.go @@ -0,0 +1,121 @@ +package localai + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/store" + "github.com/gofiber/fiber/v2" +) + +func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(schema.StoresSet) + + if err := c.BodyParser(input); err != nil { + return err + } + + sb, err := backend.StoreBackend(sl, appConfig, input.Store) + if err != nil { + return err + } + + vals := make([][]byte, len(input.Values)) + for i, v := range input.Values { + vals[i] = []byte(v) + } + + err = store.SetCols(c.Context(), sb, input.Keys, vals) + if err != nil { + return err + } + + return c.Send(nil) + } +} + +func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(schema.StoresDelete) + + if err := c.BodyParser(input); err != nil { + return err + } + + sb, err := backend.StoreBackend(sl, appConfig, input.Store) + if err != nil { + return err + } + + if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil { + return err + } + + return c.Send(nil) + } +} + +func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(schema.StoresGet) + + if err := c.BodyParser(input); err != nil { + return err + } + + sb, err := backend.StoreBackend(sl, appConfig, input.Store) + if err != nil { + return err + } + + keys, vals, err := store.GetCols(c.Context(), sb, input.Keys) + if err != nil { + return err + } + + res := schema.StoresGetResponse{ + Keys: keys, + Values: make([]string, len(vals)), + } + + for i, v := range vals { + res.Values[i] = string(v) + } + + return c.JSON(res) + } +} + +func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(schema.StoresFind) + + if err := c.BodyParser(input); err != nil { + return err + } + + sb, err := backend.StoreBackend(sl, appConfig, input.Store) + if err != nil { + return err + } + + keys, vals, similarities, err := store.Find(c.Context(), sb, input.Key, input.Topk) + if err != nil { + return err + } + + res := schema.StoresFindResponse{ + Keys: keys, + Values: make([]string, len(vals)), + Similarities: similarities, + } + + for i, v := range vals { + res.Values[i] = string(v) + } + + return c.JSON(res) + } +} diff --git a/core/schema/localai.go b/core/schema/localai.go index 5f5fd41e..e9b61cf3 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -20,3 +20,40 @@ type TTSRequest struct { Voice string `json:"voice" yaml:"voice"` Backend string `json:"backend" yaml:"backend"` } + +type StoresSet struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` +} + +type StoresDelete struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Keys [][]float32 `json:"keys"` +} + +type StoresGet struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Keys [][]float32 `json:"keys" yaml:"keys"` +} + +type StoresGetResponse struct { + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` +} + +type StoresFind struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Key []float32 `json:"key" yaml:"key"` + Topk int `json:"topk" yaml:"topk"` +} + +type StoresFindResponse struct { + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` + Similarities []float32 `json:"similarities" yaml:"similarities"` +} diff --git a/docs/content/docs/features/stores.md b/docs/content/docs/features/stores.md new file mode 100644 index 00000000..18fc750c --- /dev/null +++ b/docs/content/docs/features/stores.md @@ -0,0 +1,97 @@ + ++++ +disableToc = false +title = "💾 Stores" + +weight = 18 +url = '/stores' ++++ + +Stores are an experimental feature to help with querying data using similarity search. It is +a low level API that consists of only `get`, `set`, `delete` and `find`. + +For example if you have an embedding of some text and want to find text with similar embeddings. +You can create embeddings for chunks of all your text then compare them against the embedding of the text you +are searching on. + +An embedding here meaning a vector of numbers that represent some information about the text. The +embeddings are created from an A.I. model such as BERT or a more traditional method such as word +frequency. + +Previously you would have to integrate with an external vector database or library directly. +With the stores feature you can now do it through the LocalAI API. + +Note however that doing a similarity search on embeddings is just one way to do retrieval. A higher level +API can take this into account, so this may not be the best place to start. + +## API overview + +There is an internal gRPC API and an external facing HTTP JSON API. We'll just discuss the external HTTP API, +however the HTTP API mirrors the gRPC API. Consult `pkg/store/client` for internal usage. + +Everything is in columnar format meaning that instead of getting an array of objects with a key and a value each. +You instead get two separate arrays of keys and values. + +Keys are arrays of floating point numbers with a maximum width of 32bits. Values are strings (in gRPC they are bytes). + +The key vectors must all be the same length and it's best for search performance if they are normalized. When +addings keys it will be detected if they are not normalized and what length they are. + +All endpoints accept a `store` field which specifies which store to operate on. Presently they are created +on the fly and there is only one store backend so no configuration is required. + +## Set + +To set some keys you can do + +``` +curl -X POST http://localhost:8080/stores/set \ + -H "Content-Type: application/json" \ + -d '{"keys": [[0.1, 0.2], [0.3, 0.4]], "values": ["foo", "bar"]}' +``` + +Setting the same keys again will update their values. + +On success 200 OK is returned with no body. + +## Get + +To get some keys you can do + +``` +curl -X POST http://localhost:8080/stores/get \ + -H "Content-Type: application/json" \ + -d '{"keys": [[0.1, 0.2]]}' +``` + +Both the keys and values are returned, e.g: `{"keys":[[0.1,0.2]],"values":["foo"]}` + +The order of the keys is not preserved! If a key does not exist then nothing is returned. + +## Delete + +To delete keys and values you can do + +``` +curl -X POST http://localhost:8080/stores/delete \ + -H "Content-Type: application/json" \ + -d '{"keys": [[0.1, 0.2]]}' +``` + +If a key doesn't exist then it is ignored. + +On success 200 OK is returned with no body. + +## Find + +To do a similarity search you can do + +``` +curl -X POST http://localhost:8080/stores/find + -H "Content-Type: application/json" \ + -d '{"topk": 2, "key": [0.2, 0.1]}' +``` + +`topk` limits the number of results returned. The result value is the same as `get`, +except that it also includes an array of `similarities`. Where `1.0` is the maximum similarity. +They are returned in the order of most similar to least. diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 3ac21e94..f78a9be0 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -73,6 +73,7 @@ Note that this started just as a fun weekend project by [mudler](https://github. - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) - 🆕 [Vision API](https://localai.io/features/gpt-vision/) +- 💾 [Stores](https://localai.io/features/stores) ## Contribute and help diff --git a/examples/semantic-todo/README.md b/examples/semantic-todo/README.md new file mode 100644 index 00000000..ec9e19b9 --- /dev/null +++ b/examples/semantic-todo/README.md @@ -0,0 +1,15 @@ +This demonstrates the vector store backend in its simplest form. +You can add tasks and then search/sort them using the TUI. + +To build and run do + +```bash +$ go get . +$ go run . +``` + +A seperate LocaAI instance is required of course. For e.g. + +```bash +$ docker run -e DEBUG=true --rm -it -p 8080:8080 bert-cpp +``` diff --git a/examples/semantic-todo/go.mod b/examples/semantic-todo/go.mod new file mode 100644 index 00000000..7869e329 --- /dev/null +++ b/examples/semantic-todo/go.mod @@ -0,0 +1,18 @@ +module semantic-todo + +go 1.21.6 + +require ( + github.com/gdamore/tcell/v2 v2.7.1 + github.com/rivo/tview v0.0.0-20240307173318-e804876934a1 +) + +require ( + github.com/gdamore/encoding v1.0.0 // indirect + github.com/lucasb-eyer/go-colorful v1.2.0 // indirect + github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + golang.org/x/sys v0.17.0 // indirect + golang.org/x/term v0.17.0 // indirect + golang.org/x/text v0.14.0 // indirect +) diff --git a/examples/semantic-todo/go.sum b/examples/semantic-todo/go.sum new file mode 100644 index 00000000..320d4060 --- /dev/null +++ b/examples/semantic-todo/go.sum @@ -0,0 +1,50 @@ +github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko= +github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg= +github.com/gdamore/tcell/v2 v2.7.1 h1:TiCcmpWHiAU7F0rA2I3S2Y4mmLmO9KHxJ7E1QhYzQbc= +github.com/gdamore/tcell/v2 v2.7.1/go.mod h1:dSXtXTSK0VsW1biw65DZLZ2NKr7j0qP/0J7ONmsraWg= +github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= +github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/rivo/tview v0.0.0-20240307173318-e804876934a1 h1:bWLHTRekAy497pE7+nXSuzXwwFHI0XauRzz6roUvY+s= +github.com/rivo/tview v0.0.0-20240307173318-e804876934a1/go.mod h1:02iFIz7K/A9jGCvrizLPvoqr4cEIx7q54RH5Qudkrss= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/examples/semantic-todo/main.go b/examples/semantic-todo/main.go new file mode 100644 index 00000000..371fe6b9 --- /dev/null +++ b/examples/semantic-todo/main.go @@ -0,0 +1,352 @@ +package main + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + + "github.com/gdamore/tcell/v2" + "github.com/rivo/tview" +) + +const ( + localAI string = "http://localhost:8080" + rootStatus string = "[::b][::-]: Add Task [::b]/[::-]: Search Task [::b][::-]: Exit" + inputStatus string = "Press [::b][::-] to submit the task, [::b][::-] to cancel" +) + +type Task struct { + Description string + Similarity float32 +} + +type AppState int + +const ( + StateRoot AppState = iota + StateInput + StateSearch +) + +type App struct { + state AppState + tasks []Task + app *tview.Application + flex *tview.Flex + table *tview.Table +} + +func NewApp() *App { + return &App{ + state: StateRoot, + tasks: []Task{ + {Description: "Take the dog for a walk (after I get a dog)"}, + {Description: "Go to the toilet"}, + {Description: "Allow TODOs to be marked completed or removed"}, + }, + } +} + +func getEmbeddings(description string) ([]float32, error) { + // Define the request payload + payload := map[string]interface{}{ + "model": "bert-cpp-minilm-v6", + "input": description, + } + + // Marshal the payload into JSON + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + + // Make the HTTP request to the local OpenAI embeddings API + resp, err := http.Post(localAI+"/embeddings", "application/json", bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + // Check if the request was successful + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("request to embeddings API failed with status code: %d", resp.StatusCode) + } + + // Parse the response body + var result struct { + Data []struct { + Embedding []float32 `json:"embedding"` + } `json:"data"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + + // Return the embedding + if len(result.Data) > 0 { + return result.Data[0].Embedding, nil + } + return nil, errors.New("no embedding received from API") +} + +type StoresSet struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` +} + +func postTasksToExternalService(tasks []Task) error { + keys := make([][]float32, 0, len(tasks)) + // Get the embeddings for the task description + for _, task := range tasks { + embedding, err := getEmbeddings(task.Description) + if err != nil { + return err + } + keys = append(keys, embedding) + } + + values := make([]string, 0, len(tasks)) + for _, task := range tasks { + values = append(values, task.Description) + } + + // Construct the StoresSet object + storesSet := StoresSet{ + Store: "tasks_store", // Assuming you have a specific store name + Keys: keys, + Values: values, + } + + // Marshal the StoresSet object into JSON + jsonData, err := json.Marshal(storesSet) + if err != nil { + return err + } + + // Make the HTTP POST request to the external service + resp, err := http.Post(localAI+"/stores/set", "application/json", bytes.NewBuffer(jsonData)) + if err != nil { + return err + } + defer resp.Body.Close() + + // Check if the request was successful + if resp.StatusCode != http.StatusOK { + // read resp body into string + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + return fmt.Errorf("store request failed with status code: %d: %s", resp.StatusCode, body) + } + + return nil +} + +type StoresFind struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Key []float32 `json:"key" yaml:"key"` + Topk int `json:"topk" yaml:"topk"` +} + +type StoresFindResponse struct { + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` + Similarities []float32 `json:"similarities" yaml:"similarities"` +} + +func findSimilarTexts(inputText string, topk int) (StoresFindResponse, error) { + // Initialize an empty response object + response := StoresFindResponse{} + + // Get the embedding for the input text + embedding, err := getEmbeddings(inputText) + if err != nil { + return response, err + } + + // Construct the StoresFind object + storesFind := StoresFind{ + Store: "tasks_store", // Assuming you have a specific store name + Key: embedding, + Topk: topk, + } + + // Marshal the StoresFind object into JSON + jsonData, err := json.Marshal(storesFind) + if err != nil { + return response, err + } + + // Make the HTTP POST request to the external service's /stores/find endpoint + resp, err := http.Post(localAI+"/stores/find", "application/json", bytes.NewBuffer(jsonData)) + if err != nil { + return response, err + } + defer resp.Body.Close() + + // Check if the request was successful + if resp.StatusCode != http.StatusOK { + return response, fmt.Errorf("request to /stores/find failed with status code: %d", resp.StatusCode) + } + + // Parse the response body to retrieve similar texts and similarities + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return response, err + } + + return response, nil +} + +func (app *App) updateUI() { + // Clear the flex layout + app.flex.Clear() + app.flex.SetDirection(tview.FlexColumn) + app.flex.AddItem(nil, 0, 1, false) + + midCol := tview.NewFlex() + midCol.SetDirection(tview.FlexRow) + midCol.AddItem(nil, 0, 1, false) + + // Create a new table. + app.table.Clear() + app.table.SetBorders(true) + + // Set table headers + app.table.SetCell(0, 0, tview.NewTableCell("Description").SetAlign(tview.AlignLeft).SetExpansion(1).SetAttributes(tcell.AttrBold)) + app.table.SetCell(0, 1, tview.NewTableCell("Similarity").SetAlign(tview.AlignCenter).SetExpansion(0).SetAttributes(tcell.AttrBold)) + + // Add the tasks to the table. + for i, task := range app.tasks { + row := i + 1 + app.table.SetCell(row, 0, tview.NewTableCell(task.Description)) + app.table.SetCell(row, 1, tview.NewTableCell(fmt.Sprintf("%.2f", task.Similarity))) + } + + if app.state == StateInput { + inputField := tview.NewInputField() + inputField. + SetLabel("New Task: "). + SetFieldWidth(0). + SetDoneFunc(func(key tcell.Key) { + if key == tcell.KeyEnter { + task := Task{Description: inputField.GetText()} + app.tasks = append(app.tasks, task) + app.state = StateRoot + postTasksToExternalService([]Task{task}) + } + app.updateUI() + }) + midCol.AddItem(inputField, 3, 2, true) + app.app.SetFocus(inputField) + } else if app.state == StateSearch { + searchField := tview.NewInputField() + searchField.SetLabel("Search: "). + SetFieldWidth(0). + SetDoneFunc(func(key tcell.Key) { + if key == tcell.KeyEnter { + similar, err := findSimilarTexts(searchField.GetText(), 100) + if err != nil { + panic(err) + } + app.tasks = make([]Task, len(similar.Keys)) + for i, v := range similar.Values { + app.tasks[i] = Task{Description: v, Similarity: similar.Similarities[i]} + } + } + app.updateUI() + }) + midCol.AddItem(searchField, 3, 2, true) + app.app.SetFocus(searchField) + } else { + midCol.AddItem(nil, 3, 1, false) + } + + midCol.AddItem(app.table, 0, 2, true) + + // Add the status bar to the flex layout + statusBar := tview.NewTextView(). + SetText(rootStatus). + SetDynamicColors(true). + SetTextAlign(tview.AlignCenter) + if app.state == StateInput { + statusBar.SetText(inputStatus) + } + midCol.AddItem(statusBar, 1, 1, false) + midCol.AddItem(nil, 0, 1, false) + + app.flex.AddItem(midCol, 0, 10, true) + app.flex.AddItem(nil, 0, 1, false) + + // Set the flex as the root element + app.app.SetRoot(app.flex, true) +} + +func main() { + app := NewApp() + tApp := tview.NewApplication() + flex := tview.NewFlex().SetDirection(tview.FlexRow) + table := tview.NewTable() + + app.app = tApp + app.flex = flex + app.table = table + + app.updateUI() // Initial UI setup + + app.app.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey { + switch app.state { + case StateRoot: + // Handle key events when in the root state + switch event.Key() { + case tcell.KeyRune: + switch event.Rune() { + case ' ': + app.state = StateInput + app.updateUI() + return nil // Event is handled + case '/': + app.state = StateSearch + app.updateUI() + return nil // Event is handled + } + } + + case StateInput: + // Handle key events when in the input state + if event.Key() == tcell.KeyEsc { + // Exit input state without adding a task + app.state = StateRoot + app.updateUI() + return nil // Event is handled + } + + case StateSearch: + // Handle key events when in the search state + if event.Key() == tcell.KeyEsc { + // Exit search state + app.state = StateRoot + app.updateUI() + return nil // Event is handled + } + } + + // Return the event for further processing by tview + return event + }) + + if err := postTasksToExternalService(app.tasks); err != nil { + panic(err) + } + + // Start the application + if err := app.app.Run(); err != nil { + panic(err) + } +} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 22933d58..8fb8c39d 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -44,4 +44,9 @@ type Backend interface { AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error) + + StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) + StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) + StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) + StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) } diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index 89c8785e..0af5d94f 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -72,6 +72,22 @@ func (llm *Base) Status() (pb.StatusResponse, error) { }, nil } +func (llm *Base) StoresSet(*pb.StoresSetOptions) error { + return fmt.Errorf("unimplemented") +} + +func (llm *Base) StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error) { + return pb.StoresGetResult{}, fmt.Errorf("unimplemented") +} + +func (llm *Base) StoresDelete(*pb.StoresDeleteOptions) error { + return fmt.Errorf("unimplemented") +} + +func (llm *Base) StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error) { + return pb.StoresFindResult{}, fmt.Errorf("unimplemented") +} + func memoryUsage() *pb.MemoryUsageData { mud := pb.MemoryUsageData{ Breakdown: make(map[string]uint64), diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 9058db05..882db12a 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -291,3 +291,67 @@ func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) { client := pb.NewBackendClient(conn) return client.Status(ctx, &pb.HealthMessage{}) } + +func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) { + if !c.parallel { + c.opMutex.Lock() + defer c.opMutex.Unlock() + } + c.setBusy(true) + defer c.setBusy(false) + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewBackendClient(conn) + return client.StoresSet(ctx, in, opts...) +} + +func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) { + if !c.parallel { + c.opMutex.Lock() + defer c.opMutex.Unlock() + } + c.setBusy(true) + defer c.setBusy(false) + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewBackendClient(conn) + return client.StoresDelete(ctx, in, opts...) +} + +func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) { + if !c.parallel { + c.opMutex.Lock() + defer c.opMutex.Unlock() + } + c.setBusy(true) + defer c.setBusy(false) + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewBackendClient(conn) + return client.StoresGet(ctx, in, opts...) +} + +func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) { + if !c.parallel { + c.opMutex.Lock() + defer c.opMutex.Unlock() + } + c.setBusy(true) + defer c.setBusy(false) + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewBackendClient(conn) + return client.StoresFind(ctx, in, opts...) +} diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 228b1df5..73b185a3 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -85,6 +85,22 @@ func (e *embedBackend) Status(ctx context.Context) (*pb.StatusResponse, error) { return e.s.Status(ctx, &pb.HealthMessage{}) } +func (e *embedBackend) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) { + return e.s.StoresSet(ctx, in) +} + +func (e *embedBackend) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) { + return e.s.StoresDelete(ctx, in) +} + +func (e *embedBackend) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) { + return e.s.StoresGet(ctx, in) +} + +func (e *embedBackend) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) { + return e.s.StoresFind(ctx, in) +} + type embedBackendServerStream struct { ctx context.Context fn func(s []byte) diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index 1cc7cb3d..4d06544d 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -19,6 +19,11 @@ type LLM interface { TTS(*pb.TTSRequest) error TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error) Status() (pb.StatusResponse, error) + + StoresSet(*pb.StoresSetOptions) error + StoresDelete(*pb.StoresDeleteOptions) error + StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error) + StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error) } func newReply(s string) *pb.Reply { diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go index 48551499..cc687577 100644 --- a/pkg/grpc/proto/backend.pb.go +++ b/pkg/grpc/proto/backend.pb.go @@ -1,6 +1,6 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.26.0 +// protoc-gen-go v1.31.0 // protoc v4.23.4 // source: backend.proto @@ -69,7 +69,423 @@ func (x StatusResponse_State) Number() protoreflect.EnumNumber { // Deprecated: Use StatusResponse_State.Descriptor instead. func (StatusResponse_State) EnumDescriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{13, 0} + return file_backend_proto_rawDescGZIP(), []int{21, 0} +} + +type StoresKey struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Floats []float32 `protobuf:"fixed32,1,rep,packed,name=Floats,proto3" json:"Floats,omitempty"` +} + +func (x *StoresKey) Reset() { + *x = StoresKey{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresKey) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresKey) ProtoMessage() {} + +func (x *StoresKey) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresKey.ProtoReflect.Descriptor instead. +func (*StoresKey) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{0} +} + +func (x *StoresKey) GetFloats() []float32 { + if x != nil { + return x.Floats + } + return nil +} + +type StoresValue struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Bytes []byte `protobuf:"bytes,1,opt,name=Bytes,proto3" json:"Bytes,omitempty"` +} + +func (x *StoresValue) Reset() { + *x = StoresValue{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresValue) ProtoMessage() {} + +func (x *StoresValue) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresValue.ProtoReflect.Descriptor instead. +func (*StoresValue) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{1} +} + +func (x *StoresValue) GetBytes() []byte { + if x != nil { + return x.Bytes + } + return nil +} + +type StoresSetOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` + Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` +} + +func (x *StoresSetOptions) Reset() { + *x = StoresSetOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresSetOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresSetOptions) ProtoMessage() {} + +func (x *StoresSetOptions) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresSetOptions.ProtoReflect.Descriptor instead. +func (*StoresSetOptions) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{2} +} + +func (x *StoresSetOptions) GetKeys() []*StoresKey { + if x != nil { + return x.Keys + } + return nil +} + +func (x *StoresSetOptions) GetValues() []*StoresValue { + if x != nil { + return x.Values + } + return nil +} + +type StoresDeleteOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` +} + +func (x *StoresDeleteOptions) Reset() { + *x = StoresDeleteOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresDeleteOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresDeleteOptions) ProtoMessage() {} + +func (x *StoresDeleteOptions) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresDeleteOptions.ProtoReflect.Descriptor instead. +func (*StoresDeleteOptions) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{3} +} + +func (x *StoresDeleteOptions) GetKeys() []*StoresKey { + if x != nil { + return x.Keys + } + return nil +} + +type StoresGetOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` +} + +func (x *StoresGetOptions) Reset() { + *x = StoresGetOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresGetOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresGetOptions) ProtoMessage() {} + +func (x *StoresGetOptions) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresGetOptions.ProtoReflect.Descriptor instead. +func (*StoresGetOptions) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{4} +} + +func (x *StoresGetOptions) GetKeys() []*StoresKey { + if x != nil { + return x.Keys + } + return nil +} + +type StoresGetResult struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` + Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` +} + +func (x *StoresGetResult) Reset() { + *x = StoresGetResult{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresGetResult) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresGetResult) ProtoMessage() {} + +func (x *StoresGetResult) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresGetResult.ProtoReflect.Descriptor instead. +func (*StoresGetResult) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{5} +} + +func (x *StoresGetResult) GetKeys() []*StoresKey { + if x != nil { + return x.Keys + } + return nil +} + +func (x *StoresGetResult) GetValues() []*StoresValue { + if x != nil { + return x.Values + } + return nil +} + +type StoresFindOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Key *StoresKey `protobuf:"bytes,1,opt,name=Key,proto3" json:"Key,omitempty"` + TopK int32 `protobuf:"varint,2,opt,name=TopK,proto3" json:"TopK,omitempty"` +} + +func (x *StoresFindOptions) Reset() { + *x = StoresFindOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresFindOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresFindOptions) ProtoMessage() {} + +func (x *StoresFindOptions) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresFindOptions.ProtoReflect.Descriptor instead. +func (*StoresFindOptions) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{6} +} + +func (x *StoresFindOptions) GetKey() *StoresKey { + if x != nil { + return x.Key + } + return nil +} + +func (x *StoresFindOptions) GetTopK() int32 { + if x != nil { + return x.TopK + } + return 0 +} + +type StoresFindResult struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` + Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` + Similarities []float32 `protobuf:"fixed32,3,rep,packed,name=Similarities,proto3" json:"Similarities,omitempty"` +} + +func (x *StoresFindResult) Reset() { + *x = StoresFindResult{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StoresFindResult) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StoresFindResult) ProtoMessage() {} + +func (x *StoresFindResult) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StoresFindResult.ProtoReflect.Descriptor instead. +func (*StoresFindResult) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{7} +} + +func (x *StoresFindResult) GetKeys() []*StoresKey { + if x != nil { + return x.Keys + } + return nil +} + +func (x *StoresFindResult) GetValues() []*StoresValue { + if x != nil { + return x.Values + } + return nil +} + +func (x *StoresFindResult) GetSimilarities() []float32 { + if x != nil { + return x.Similarities + } + return nil } type HealthMessage struct { @@ -81,7 +497,7 @@ type HealthMessage struct { func (x *HealthMessage) Reset() { *x = HealthMessage{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[0] + mi := &file_backend_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -94,7 +510,7 @@ func (x *HealthMessage) String() string { func (*HealthMessage) ProtoMessage() {} func (x *HealthMessage) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[0] + mi := &file_backend_proto_msgTypes[8] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -107,7 +523,7 @@ func (x *HealthMessage) ProtoReflect() protoreflect.Message { // Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead. func (*HealthMessage) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{0} + return file_backend_proto_rawDescGZIP(), []int{8} } // The request message containing the user's name. @@ -162,7 +578,7 @@ type PredictOptions struct { func (x *PredictOptions) Reset() { *x = PredictOptions{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[1] + mi := &file_backend_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -175,7 +591,7 @@ func (x *PredictOptions) String() string { func (*PredictOptions) ProtoMessage() {} func (x *PredictOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[1] + mi := &file_backend_proto_msgTypes[9] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -188,7 +604,7 @@ func (x *PredictOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead. func (*PredictOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{1} + return file_backend_proto_rawDescGZIP(), []int{9} } func (x *PredictOptions) GetPrompt() string { @@ -490,7 +906,7 @@ type Reply struct { func (x *Reply) Reset() { *x = Reply{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[2] + mi := &file_backend_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -503,7 +919,7 @@ func (x *Reply) String() string { func (*Reply) ProtoMessage() {} func (x *Reply) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[2] + mi := &file_backend_proto_msgTypes[10] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -516,7 +932,7 @@ func (x *Reply) ProtoReflect() protoreflect.Message { // Deprecated: Use Reply.ProtoReflect.Descriptor instead. func (*Reply) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{2} + return file_backend_proto_rawDescGZIP(), []int{10} } func (x *Reply) GetMessage() []byte { @@ -594,7 +1010,7 @@ type ModelOptions struct { func (x *ModelOptions) Reset() { *x = ModelOptions{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[3] + mi := &file_backend_proto_msgTypes[11] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -607,7 +1023,7 @@ func (x *ModelOptions) String() string { func (*ModelOptions) ProtoMessage() {} func (x *ModelOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[3] + mi := &file_backend_proto_msgTypes[11] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -620,7 +1036,7 @@ func (x *ModelOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead. func (*ModelOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{3} + return file_backend_proto_rawDescGZIP(), []int{11} } func (x *ModelOptions) GetModel() string { @@ -1013,7 +1429,7 @@ type Result struct { func (x *Result) Reset() { *x = Result{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[4] + mi := &file_backend_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1026,7 +1442,7 @@ func (x *Result) String() string { func (*Result) ProtoMessage() {} func (x *Result) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[4] + mi := &file_backend_proto_msgTypes[12] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1039,7 +1455,7 @@ func (x *Result) ProtoReflect() protoreflect.Message { // Deprecated: Use Result.ProtoReflect.Descriptor instead. func (*Result) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{4} + return file_backend_proto_rawDescGZIP(), []int{12} } func (x *Result) GetMessage() string { @@ -1067,7 +1483,7 @@ type EmbeddingResult struct { func (x *EmbeddingResult) Reset() { *x = EmbeddingResult{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[5] + mi := &file_backend_proto_msgTypes[13] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1080,7 +1496,7 @@ func (x *EmbeddingResult) String() string { func (*EmbeddingResult) ProtoMessage() {} func (x *EmbeddingResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[5] + mi := &file_backend_proto_msgTypes[13] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1093,7 +1509,7 @@ func (x *EmbeddingResult) ProtoReflect() protoreflect.Message { // Deprecated: Use EmbeddingResult.ProtoReflect.Descriptor instead. func (*EmbeddingResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{5} + return file_backend_proto_rawDescGZIP(), []int{13} } func (x *EmbeddingResult) GetEmbeddings() []float32 { @@ -1116,7 +1532,7 @@ type TranscriptRequest struct { func (x *TranscriptRequest) Reset() { *x = TranscriptRequest{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[6] + mi := &file_backend_proto_msgTypes[14] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1129,7 +1545,7 @@ func (x *TranscriptRequest) String() string { func (*TranscriptRequest) ProtoMessage() {} func (x *TranscriptRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[6] + mi := &file_backend_proto_msgTypes[14] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1142,7 +1558,7 @@ func (x *TranscriptRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use TranscriptRequest.ProtoReflect.Descriptor instead. func (*TranscriptRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{6} + return file_backend_proto_rawDescGZIP(), []int{14} } func (x *TranscriptRequest) GetDst() string { @@ -1178,7 +1594,7 @@ type TranscriptResult struct { func (x *TranscriptResult) Reset() { *x = TranscriptResult{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[7] + mi := &file_backend_proto_msgTypes[15] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1191,7 +1607,7 @@ func (x *TranscriptResult) String() string { func (*TranscriptResult) ProtoMessage() {} func (x *TranscriptResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[7] + mi := &file_backend_proto_msgTypes[15] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1204,7 +1620,7 @@ func (x *TranscriptResult) ProtoReflect() protoreflect.Message { // Deprecated: Use TranscriptResult.ProtoReflect.Descriptor instead. func (*TranscriptResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{7} + return file_backend_proto_rawDescGZIP(), []int{15} } func (x *TranscriptResult) GetSegments() []*TranscriptSegment { @@ -1236,7 +1652,7 @@ type TranscriptSegment struct { func (x *TranscriptSegment) Reset() { *x = TranscriptSegment{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[8] + mi := &file_backend_proto_msgTypes[16] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1249,7 +1665,7 @@ func (x *TranscriptSegment) String() string { func (*TranscriptSegment) ProtoMessage() {} func (x *TranscriptSegment) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[8] + mi := &file_backend_proto_msgTypes[16] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1262,7 +1678,7 @@ func (x *TranscriptSegment) ProtoReflect() protoreflect.Message { // Deprecated: Use TranscriptSegment.ProtoReflect.Descriptor instead. func (*TranscriptSegment) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{8} + return file_backend_proto_rawDescGZIP(), []int{16} } func (x *TranscriptSegment) GetId() int32 { @@ -1322,7 +1738,7 @@ type GenerateImageRequest struct { func (x *GenerateImageRequest) Reset() { *x = GenerateImageRequest{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[9] + mi := &file_backend_proto_msgTypes[17] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1335,7 +1751,7 @@ func (x *GenerateImageRequest) String() string { func (*GenerateImageRequest) ProtoMessage() {} func (x *GenerateImageRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[9] + mi := &file_backend_proto_msgTypes[17] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1348,7 +1764,7 @@ func (x *GenerateImageRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GenerateImageRequest.ProtoReflect.Descriptor instead. func (*GenerateImageRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{9} + return file_backend_proto_rawDescGZIP(), []int{17} } func (x *GenerateImageRequest) GetHeight() int32 { @@ -1442,7 +1858,7 @@ type TTSRequest struct { func (x *TTSRequest) Reset() { *x = TTSRequest{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[10] + mi := &file_backend_proto_msgTypes[18] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1455,7 +1871,7 @@ func (x *TTSRequest) String() string { func (*TTSRequest) ProtoMessage() {} func (x *TTSRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[10] + mi := &file_backend_proto_msgTypes[18] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1468,7 +1884,7 @@ func (x *TTSRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use TTSRequest.ProtoReflect.Descriptor instead. func (*TTSRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{10} + return file_backend_proto_rawDescGZIP(), []int{18} } func (x *TTSRequest) GetText() string { @@ -1511,7 +1927,7 @@ type TokenizationResponse struct { func (x *TokenizationResponse) Reset() { *x = TokenizationResponse{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[11] + mi := &file_backend_proto_msgTypes[19] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1524,7 +1940,7 @@ func (x *TokenizationResponse) String() string { func (*TokenizationResponse) ProtoMessage() {} func (x *TokenizationResponse) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[11] + mi := &file_backend_proto_msgTypes[19] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1537,7 +1953,7 @@ func (x *TokenizationResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use TokenizationResponse.ProtoReflect.Descriptor instead. func (*TokenizationResponse) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{11} + return file_backend_proto_rawDescGZIP(), []int{19} } func (x *TokenizationResponse) GetLength() int32 { @@ -1566,7 +1982,7 @@ type MemoryUsageData struct { func (x *MemoryUsageData) Reset() { *x = MemoryUsageData{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[12] + mi := &file_backend_proto_msgTypes[20] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1579,7 +1995,7 @@ func (x *MemoryUsageData) String() string { func (*MemoryUsageData) ProtoMessage() {} func (x *MemoryUsageData) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[12] + mi := &file_backend_proto_msgTypes[20] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1592,7 +2008,7 @@ func (x *MemoryUsageData) ProtoReflect() protoreflect.Message { // Deprecated: Use MemoryUsageData.ProtoReflect.Descriptor instead. func (*MemoryUsageData) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{12} + return file_backend_proto_rawDescGZIP(), []int{20} } func (x *MemoryUsageData) GetTotal() uint64 { @@ -1621,7 +2037,7 @@ type StatusResponse struct { func (x *StatusResponse) Reset() { *x = StatusResponse{} if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[13] + mi := &file_backend_proto_msgTypes[21] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1634,7 +2050,7 @@ func (x *StatusResponse) String() string { func (*StatusResponse) ProtoMessage() {} func (x *StatusResponse) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[13] + mi := &file_backend_proto_msgTypes[21] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1647,7 +2063,7 @@ func (x *StatusResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use StatusResponse.ProtoReflect.Descriptor instead. func (*StatusResponse) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{13} + return file_backend_proto_rawDescGZIP(), []int{21} } func (x *StatusResponse) GetState() StatusResponse_State { @@ -1668,322 +2084,377 @@ var File_backend_proto protoreflect.FileDescriptor var file_backend_proto_rawDesc = []byte{ 0x0a, 0x0d, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x07, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, - 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72, - 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, - 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, - 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, - 0x61, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, - 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, - 0x70, 0x4b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, - 0x0a, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, - 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, - 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, - 0x65, 0x70, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, - 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, - 0x74, 0x75, 0x72, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, - 0x0a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, - 0x0a, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, - 0x31, 0x36, 0x4b, 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, - 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, - 0x64, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, - 0x6d, 0x70, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, - 0x53, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, - 0x4f, 0x53, 0x12, 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, - 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, - 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, - 0x12, 0x1a, 0x0a, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, - 0x28, 0x02, 0x52, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, - 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, - 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, - 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, - 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, - 0x02, 0x52, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, - 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, - 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, - 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, - 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, - 0x15, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, - 0x41, 0x55, 0x12, 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, - 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, - 0x4e, 0x4c, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, - 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, - 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, - 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, - 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, - 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, - 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18, 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, - 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, - 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, + 0x07, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x23, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, + 0x65, 0x73, 0x4b, 0x65, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x22, 0x23, 0x0a, + 0x0b, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x14, 0x0a, 0x05, + 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x42, 0x79, 0x74, + 0x65, 0x73, 0x22, 0x68, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, + 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, + 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x22, 0x3d, 0x0a, 0x13, + 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, + 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x3a, 0x0a, 0x10, 0x53, + 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, + 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, + 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x67, 0x0a, 0x0f, 0x53, 0x74, 0x6f, 0x72, 0x65, + 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, + 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, + 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, + 0x22, 0x4d, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, 0x03, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, + 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x03, 0x4b, 0x65, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x54, + 0x6f, 0x70, 0x4b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x22, + 0x8c, 0x01, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, + 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, + 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x53, 0x69, + 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02, + 0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f, + 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, + 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, + 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, + 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, + 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, + 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, + 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, + 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, + 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, + 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, + 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, + 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, + 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, + 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, + 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, + 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, + 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, + 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, + 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12, 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, + 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, + 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, + 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, + 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, + 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, + 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, + 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, + 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, + 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, + 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, + 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, + 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, + 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, + 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, + 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, + 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12, 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, + 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, + 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, + 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, + 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, + 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, + 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, + 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, + 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, + 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d, + 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18, + 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, + 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, + 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, + 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, + 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01, + 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d, + 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, + 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x12, 0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, + 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28, + 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, + 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, + 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, + 0x67, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, + 0x73, 0x65, 0x18, 0x25, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, + 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, + 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, + 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x30, 0x0a, 0x13, + 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, + 0x61, 0x6c, 0x65, 0x18, 0x27, 0x20, 0x01, 0x28, 0x02, 0x52, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74, + 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x26, + 0x0a, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, + 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, + 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, + 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, + 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, + 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f, + 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, + 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, + 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, + 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c, + 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, + 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, + 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, + 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, + 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18, + 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e, + 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12, + 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55, + 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, + 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, + 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, - 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, - 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01, 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, - 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, - 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, - 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, - 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, - 0x75, 0x67, 0x12, 0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, - 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28, 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62, - 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a, - 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x22, 0x0a, 0x0c, - 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x25, 0x20, 0x01, - 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, - 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, - 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, - 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x30, 0x0a, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, - 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x27, 0x20, - 0x01, 0x28, 0x02, 0x52, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, - 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x26, 0x0a, 0x0e, 0x4e, 0x65, 0x67, 0x61, - 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, - 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, - 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, - 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, - 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, - 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, - 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, - 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, - 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, - 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, - 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, - 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, - 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, - 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, - 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, - 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, - 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, - 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, - 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, - 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, - 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, - 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, - 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, - 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, - 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, - 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, - 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, - 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, - 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, - 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, - 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, - 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, - 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, - 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, - 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, - 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, - 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, - 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, - 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, - 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, - 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, - 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, - 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, - 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, - 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, - 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, - 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, - 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, - 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, - 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, - 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, - 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, - 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, - 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, - 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, - 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, - 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, - 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, - 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, - 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, - 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, - 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, - 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, - 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, - 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, - 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, - 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, - 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, - 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, - 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, - 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, - 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, - 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, - 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, - 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, - 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, - 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, - 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, - 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, - 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, - 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, - 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, - 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, - 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, - 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, - 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, - 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, - 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, - 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, - 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, - 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, - 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, - 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, - 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, - 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, - 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, - 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, - 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, - 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, - 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, - 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, - 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, - 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, - 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, - 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, - 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, - 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, - 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, - 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, - 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, - 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, - 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, - 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, - 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, - 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, - 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, - 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, - 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, - 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, - 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, - 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, - 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, - 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, - 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, - 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, - 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, - 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, - 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, - 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, - 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, - 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, - 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, - 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, - 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, - 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, - 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, - 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, - 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, - 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xf4, 0x04, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, - 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, - 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, - 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, - 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, - 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, - 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, - 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, + 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18, + 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72, + 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, + 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, + 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, + 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, + 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28, + 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, + 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, + 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, + 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, + 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, + 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, + 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, + 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, + 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, + 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, + 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, + 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, + 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69, + 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, + 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18, + 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53, + 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53, + 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18, + 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c, + 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d, + 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, + 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1e, + 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x12, 0x1c, + 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, + 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61, + 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c, + 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, + 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x09, 0x4c, + 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75, + 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d, + 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, + 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66, + 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, + 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, + 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e, + 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d, + 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, + 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f, + 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18, + 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, + 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, + 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e, + 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77, + 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53, + 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d, + 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d, + 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, + 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, + 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, + 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, + 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, + 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72, + 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61, + 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01, + 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, + 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, + 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, + 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, + 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, + 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, + 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, + 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, + 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, + 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, + 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, + 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, + 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, + 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, + 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, + 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, + 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, + 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, + 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, + 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, + 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, + 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, + 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, + 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, + 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, + 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, + 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, + 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, + 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, + 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, + 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, + 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, + 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, + 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, + 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, + 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, + 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, + 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, + 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, + 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, + 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, + 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, + 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac, + 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, + 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61, + 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, + 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a, + 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01, + 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, + 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45, + 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a, + 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, + 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xfb, 0x06, 0x0a, + 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, + 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, + 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, + 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, + 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, + 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, + 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, + 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, + 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, + 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, + 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, + 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, + 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, + 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, + 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, + 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, + 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, + 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, + 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, + 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, + 0x73, 0x53, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, + 0x0a, 0x0c, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, + 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, + 0x42, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, - 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, - 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, - 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, - 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, - 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, - 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x5a, 0x0a, - 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, - 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, - 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, - 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, - 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, + 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x22, 0x00, 0x12, 0x45, 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, + 0x64, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, + 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, + 0x6e, 0x64, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, + 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, + 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, + 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, + 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1999,55 +2470,80 @@ func file_backend_proto_rawDescGZIP() []byte { } var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 15) +var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 23) var file_backend_proto_goTypes = []interface{}{ (StatusResponse_State)(0), // 0: backend.StatusResponse.State - (*HealthMessage)(nil), // 1: backend.HealthMessage - (*PredictOptions)(nil), // 2: backend.PredictOptions - (*Reply)(nil), // 3: backend.Reply - (*ModelOptions)(nil), // 4: backend.ModelOptions - (*Result)(nil), // 5: backend.Result - (*EmbeddingResult)(nil), // 6: backend.EmbeddingResult - (*TranscriptRequest)(nil), // 7: backend.TranscriptRequest - (*TranscriptResult)(nil), // 8: backend.TranscriptResult - (*TranscriptSegment)(nil), // 9: backend.TranscriptSegment - (*GenerateImageRequest)(nil), // 10: backend.GenerateImageRequest - (*TTSRequest)(nil), // 11: backend.TTSRequest - (*TokenizationResponse)(nil), // 12: backend.TokenizationResponse - (*MemoryUsageData)(nil), // 13: backend.MemoryUsageData - (*StatusResponse)(nil), // 14: backend.StatusResponse - nil, // 15: backend.MemoryUsageData.BreakdownEntry + (*StoresKey)(nil), // 1: backend.StoresKey + (*StoresValue)(nil), // 2: backend.StoresValue + (*StoresSetOptions)(nil), // 3: backend.StoresSetOptions + (*StoresDeleteOptions)(nil), // 4: backend.StoresDeleteOptions + (*StoresGetOptions)(nil), // 5: backend.StoresGetOptions + (*StoresGetResult)(nil), // 6: backend.StoresGetResult + (*StoresFindOptions)(nil), // 7: backend.StoresFindOptions + (*StoresFindResult)(nil), // 8: backend.StoresFindResult + (*HealthMessage)(nil), // 9: backend.HealthMessage + (*PredictOptions)(nil), // 10: backend.PredictOptions + (*Reply)(nil), // 11: backend.Reply + (*ModelOptions)(nil), // 12: backend.ModelOptions + (*Result)(nil), // 13: backend.Result + (*EmbeddingResult)(nil), // 14: backend.EmbeddingResult + (*TranscriptRequest)(nil), // 15: backend.TranscriptRequest + (*TranscriptResult)(nil), // 16: backend.TranscriptResult + (*TranscriptSegment)(nil), // 17: backend.TranscriptSegment + (*GenerateImageRequest)(nil), // 18: backend.GenerateImageRequest + (*TTSRequest)(nil), // 19: backend.TTSRequest + (*TokenizationResponse)(nil), // 20: backend.TokenizationResponse + (*MemoryUsageData)(nil), // 21: backend.MemoryUsageData + (*StatusResponse)(nil), // 22: backend.StatusResponse + nil, // 23: backend.MemoryUsageData.BreakdownEntry } var file_backend_proto_depIdxs = []int32{ - 9, // 0: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment - 15, // 1: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry - 0, // 2: backend.StatusResponse.state:type_name -> backend.StatusResponse.State - 13, // 3: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData - 1, // 4: backend.Backend.Health:input_type -> backend.HealthMessage - 2, // 5: backend.Backend.Predict:input_type -> backend.PredictOptions - 4, // 6: backend.Backend.LoadModel:input_type -> backend.ModelOptions - 2, // 7: backend.Backend.PredictStream:input_type -> backend.PredictOptions - 2, // 8: backend.Backend.Embedding:input_type -> backend.PredictOptions - 10, // 9: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest - 7, // 10: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest - 11, // 11: backend.Backend.TTS:input_type -> backend.TTSRequest - 2, // 12: backend.Backend.TokenizeString:input_type -> backend.PredictOptions - 1, // 13: backend.Backend.Status:input_type -> backend.HealthMessage - 3, // 14: backend.Backend.Health:output_type -> backend.Reply - 3, // 15: backend.Backend.Predict:output_type -> backend.Reply - 5, // 16: backend.Backend.LoadModel:output_type -> backend.Result - 3, // 17: backend.Backend.PredictStream:output_type -> backend.Reply - 6, // 18: backend.Backend.Embedding:output_type -> backend.EmbeddingResult - 5, // 19: backend.Backend.GenerateImage:output_type -> backend.Result - 8, // 20: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult - 5, // 21: backend.Backend.TTS:output_type -> backend.Result - 12, // 22: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse - 14, // 23: backend.Backend.Status:output_type -> backend.StatusResponse - 14, // [14:24] is the sub-list for method output_type - 4, // [4:14] is the sub-list for method input_type - 4, // [4:4] is the sub-list for extension type_name - 4, // [4:4] is the sub-list for extension extendee - 0, // [0:4] is the sub-list for field type_name + 1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey + 2, // 1: backend.StoresSetOptions.Values:type_name -> backend.StoresValue + 1, // 2: backend.StoresDeleteOptions.Keys:type_name -> backend.StoresKey + 1, // 3: backend.StoresGetOptions.Keys:type_name -> backend.StoresKey + 1, // 4: backend.StoresGetResult.Keys:type_name -> backend.StoresKey + 2, // 5: backend.StoresGetResult.Values:type_name -> backend.StoresValue + 1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey + 1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey + 2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue + 17, // 9: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment + 23, // 10: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry + 0, // 11: backend.StatusResponse.state:type_name -> backend.StatusResponse.State + 21, // 12: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData + 9, // 13: backend.Backend.Health:input_type -> backend.HealthMessage + 10, // 14: backend.Backend.Predict:input_type -> backend.PredictOptions + 12, // 15: backend.Backend.LoadModel:input_type -> backend.ModelOptions + 10, // 16: backend.Backend.PredictStream:input_type -> backend.PredictOptions + 10, // 17: backend.Backend.Embedding:input_type -> backend.PredictOptions + 18, // 18: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest + 15, // 19: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest + 19, // 20: backend.Backend.TTS:input_type -> backend.TTSRequest + 10, // 21: backend.Backend.TokenizeString:input_type -> backend.PredictOptions + 9, // 22: backend.Backend.Status:input_type -> backend.HealthMessage + 3, // 23: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions + 4, // 24: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions + 5, // 25: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions + 7, // 26: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions + 11, // 27: backend.Backend.Health:output_type -> backend.Reply + 11, // 28: backend.Backend.Predict:output_type -> backend.Reply + 13, // 29: backend.Backend.LoadModel:output_type -> backend.Result + 11, // 30: backend.Backend.PredictStream:output_type -> backend.Reply + 14, // 31: backend.Backend.Embedding:output_type -> backend.EmbeddingResult + 13, // 32: backend.Backend.GenerateImage:output_type -> backend.Result + 16, // 33: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult + 13, // 34: backend.Backend.TTS:output_type -> backend.Result + 20, // 35: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse + 22, // 36: backend.Backend.Status:output_type -> backend.StatusResponse + 13, // 37: backend.Backend.StoresSet:output_type -> backend.Result + 13, // 38: backend.Backend.StoresDelete:output_type -> backend.Result + 6, // 39: backend.Backend.StoresGet:output_type -> backend.StoresGetResult + 8, // 40: backend.Backend.StoresFind:output_type -> backend.StoresFindResult + 27, // [27:41] is the sub-list for method output_type + 13, // [13:27] is the sub-list for method input_type + 13, // [13:13] is the sub-list for extension type_name + 13, // [13:13] is the sub-list for extension extendee + 0, // [0:13] is the sub-list for field type_name } func init() { file_backend_proto_init() } @@ -2057,7 +2553,7 @@ func file_backend_proto_init() { } if !protoimpl.UnsafeEnabled { file_backend_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*HealthMessage); i { + switch v := v.(*StoresKey); i { case 0: return &v.state case 1: @@ -2069,7 +2565,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PredictOptions); i { + switch v := v.(*StoresValue); i { case 0: return &v.state case 1: @@ -2081,7 +2577,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Reply); i { + switch v := v.(*StoresSetOptions); i { case 0: return &v.state case 1: @@ -2093,7 +2589,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ModelOptions); i { + switch v := v.(*StoresDeleteOptions); i { case 0: return &v.state case 1: @@ -2105,7 +2601,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Result); i { + switch v := v.(*StoresGetOptions); i { case 0: return &v.state case 1: @@ -2117,7 +2613,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EmbeddingResult); i { + switch v := v.(*StoresGetResult); i { case 0: return &v.state case 1: @@ -2129,7 +2625,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptRequest); i { + switch v := v.(*StoresFindOptions); i { case 0: return &v.state case 1: @@ -2141,7 +2637,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptResult); i { + switch v := v.(*StoresFindResult); i { case 0: return &v.state case 1: @@ -2153,7 +2649,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptSegment); i { + switch v := v.(*HealthMessage); i { case 0: return &v.state case 1: @@ -2165,7 +2661,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GenerateImageRequest); i { + switch v := v.(*PredictOptions); i { case 0: return &v.state case 1: @@ -2177,7 +2673,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TTSRequest); i { + switch v := v.(*Reply); i { case 0: return &v.state case 1: @@ -2189,7 +2685,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TokenizationResponse); i { + switch v := v.(*ModelOptions); i { case 0: return &v.state case 1: @@ -2201,7 +2697,7 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MemoryUsageData); i { + switch v := v.(*Result); i { case 0: return &v.state case 1: @@ -2213,6 +2709,102 @@ func file_backend_proto_init() { } } file_backend_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*EmbeddingResult); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*TranscriptRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*TranscriptResult); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*TranscriptSegment); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GenerateImageRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*TTSRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*TokenizationResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*MemoryUsageData); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_backend_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*StatusResponse); i { case 0: return &v.state @@ -2231,7 +2823,7 @@ func file_backend_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_backend_proto_rawDesc, NumEnums: 1, - NumMessages: 15, + NumMessages: 23, NumExtensions: 0, NumServices: 1, }, diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go index ef5187bc..0314cd4e 100644 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ b/pkg/grpc/proto/backend_grpc.pb.go @@ -1,6 +1,6 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.2.0 +// - protoc-gen-go-grpc v1.3.0 // - protoc v4.23.4 // source: backend.proto @@ -18,6 +18,23 @@ import ( // Requires gRPC-Go v1.32.0 or later. const _ = grpc.SupportPackageIsVersion7 +const ( + Backend_Health_FullMethodName = "/backend.Backend/Health" + Backend_Predict_FullMethodName = "/backend.Backend/Predict" + Backend_LoadModel_FullMethodName = "/backend.Backend/LoadModel" + Backend_PredictStream_FullMethodName = "/backend.Backend/PredictStream" + Backend_Embedding_FullMethodName = "/backend.Backend/Embedding" + Backend_GenerateImage_FullMethodName = "/backend.Backend/GenerateImage" + Backend_AudioTranscription_FullMethodName = "/backend.Backend/AudioTranscription" + Backend_TTS_FullMethodName = "/backend.Backend/TTS" + Backend_TokenizeString_FullMethodName = "/backend.Backend/TokenizeString" + Backend_Status_FullMethodName = "/backend.Backend/Status" + Backend_StoresSet_FullMethodName = "/backend.Backend/StoresSet" + Backend_StoresDelete_FullMethodName = "/backend.Backend/StoresDelete" + Backend_StoresGet_FullMethodName = "/backend.Backend/StoresGet" + Backend_StoresFind_FullMethodName = "/backend.Backend/StoresFind" +) + // BackendClient is the client API for Backend service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. @@ -32,6 +49,10 @@ type BackendClient interface { TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) + StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) + StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) + StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) + StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) } type backendClient struct { @@ -44,7 +65,7 @@ func NewBackendClient(cc grpc.ClientConnInterface) BackendClient { func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { out := new(Reply) - err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_Health_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -53,7 +74,7 @@ func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...g func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { out := new(Reply) - err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_Predict_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -62,7 +83,7 @@ func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts .. func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_LoadModel_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -70,7 +91,7 @@ func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts .. } func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) { - stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...) + stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], Backend_PredictStream_FullMethodName, opts...) if err != nil { return nil, err } @@ -103,7 +124,7 @@ func (x *backendPredictStreamClient) Recv() (*Reply, error) { func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) { out := new(EmbeddingResult) - err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_Embedding_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -112,7 +133,7 @@ func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) { out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_GenerateImage_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -121,7 +142,7 @@ func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequ func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) { out := new(TranscriptResult) - err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_AudioTranscription_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -130,7 +151,7 @@ func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRe func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) { out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_TTS_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -139,7 +160,7 @@ func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.Ca func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) { out := new(TokenizationResponse) - err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_TokenizeString_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -148,7 +169,43 @@ func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) { out := new(StatusResponse) - err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...) + err := c.cc.Invoke(ctx, Backend_Status_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) { + out := new(Result) + err := c.cc.Invoke(ctx, Backend_StoresSet_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) { + out := new(Result) + err := c.cc.Invoke(ctx, Backend_StoresDelete_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) { + out := new(StoresGetResult) + err := c.cc.Invoke(ctx, Backend_StoresGet_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *backendClient) StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) { + out := new(StoresFindResult) + err := c.cc.Invoke(ctx, Backend_StoresFind_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -169,6 +226,10 @@ type BackendServer interface { TTS(context.Context, *TTSRequest) (*Result, error) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) Status(context.Context, *HealthMessage) (*StatusResponse, error) + StoresSet(context.Context, *StoresSetOptions) (*Result, error) + StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) + StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) + StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) mustEmbedUnimplementedBackendServer() } @@ -206,6 +267,18 @@ func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOption func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method Status not implemented") } +func (UnimplementedBackendServer) StoresSet(context.Context, *StoresSetOptions) (*Result, error) { + return nil, status.Errorf(codes.Unimplemented, "method StoresSet not implemented") +} +func (UnimplementedBackendServer) StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) { + return nil, status.Errorf(codes.Unimplemented, "method StoresDelete not implemented") +} +func (UnimplementedBackendServer) StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) { + return nil, status.Errorf(codes.Unimplemented, "method StoresGet not implemented") +} +func (UnimplementedBackendServer) StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) { + return nil, status.Errorf(codes.Unimplemented, "method StoresFind not implemented") +} func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {} // UnsafeBackendServer may be embedded to opt out of forward compatibility for this service. @@ -229,7 +302,7 @@ func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(inte } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/Health", + FullMethod: Backend_Health_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).Health(ctx, req.(*HealthMessage)) @@ -247,7 +320,7 @@ func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(int } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/Predict", + FullMethod: Backend_Predict_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).Predict(ctx, req.(*PredictOptions)) @@ -265,7 +338,7 @@ func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/LoadModel", + FullMethod: Backend_LoadModel_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions)) @@ -304,7 +377,7 @@ func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/Embedding", + FullMethod: Backend_Embedding_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions)) @@ -322,7 +395,7 @@ func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/GenerateImage", + FullMethod: Backend_GenerateImage_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest)) @@ -340,7 +413,7 @@ func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, d } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/AudioTranscription", + FullMethod: Backend_AudioTranscription_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest)) @@ -358,7 +431,7 @@ func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interfa } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/TTS", + FullMethod: Backend_TTS_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).TTS(ctx, req.(*TTSRequest)) @@ -376,7 +449,7 @@ func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/TokenizeString", + FullMethod: Backend_TokenizeString_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions)) @@ -394,7 +467,7 @@ func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(inte } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/backend.Backend/Status", + FullMethod: Backend_Status_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(BackendServer).Status(ctx, req.(*HealthMessage)) @@ -402,6 +475,78 @@ func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(inte return interceptor(ctx, in, info, handler) } +func _Backend_StoresSet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StoresSetOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).StoresSet(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Backend_StoresSet_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).StoresSet(ctx, req.(*StoresSetOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_StoresDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StoresDeleteOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).StoresDelete(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Backend_StoresDelete_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).StoresDelete(ctx, req.(*StoresDeleteOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_StoresGet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StoresGetOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).StoresGet(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Backend_StoresGet_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).StoresGet(ctx, req.(*StoresGetOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _Backend_StoresFind_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StoresFindOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BackendServer).StoresFind(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Backend_StoresFind_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BackendServer).StoresFind(ctx, req.(*StoresFindOptions)) + } + return interceptor(ctx, in, info, handler) +} + // Backend_ServiceDesc is the grpc.ServiceDesc for Backend service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -445,6 +590,22 @@ var Backend_ServiceDesc = grpc.ServiceDesc{ MethodName: "Status", Handler: _Backend_Status_Handler, }, + { + MethodName: "StoresSet", + Handler: _Backend_StoresSet_Handler, + }, + { + MethodName: "StoresDelete", + Handler: _Backend_StoresDelete_Handler, + }, + { + MethodName: "StoresGet", + Handler: _Backend_StoresGet_Handler, + }, + { + MethodName: "StoresFind", + Handler: _Backend_StoresFind_Handler, + }, }, Streams: []grpc.StreamDesc{ { diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go index 07d055d9..8116241f 100644 --- a/pkg/grpc/server.go +++ b/pkg/grpc/server.go @@ -167,6 +167,54 @@ func (s *server) Status(ctx context.Context, in *pb.HealthMessage) (*pb.StatusRe return &res, nil } +func (s *server) StoresSet(ctx context.Context, in *pb.StoresSetOptions) (*pb.Result, error) { + if s.llm.Locking() { + s.llm.Lock() + defer s.llm.Unlock() + } + err := s.llm.StoresSet(in) + if err != nil { + return &pb.Result{Message: fmt.Sprintf("Error setting entry: %s", err.Error()), Success: false}, err + } + return &pb.Result{Message: "Set key", Success: true}, nil +} + +func (s *server) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions) (*pb.Result, error) { + if s.llm.Locking() { + s.llm.Lock() + defer s.llm.Unlock() + } + err := s.llm.StoresDelete(in) + if err != nil { + return &pb.Result{Message: fmt.Sprintf("Error deleting entry: %s", err.Error()), Success: false}, err + } + return &pb.Result{Message: "Deleted key", Success: true}, nil +} + +func (s *server) StoresGet(ctx context.Context, in *pb.StoresGetOptions) (*pb.StoresGetResult, error) { + if s.llm.Locking() { + s.llm.Lock() + defer s.llm.Unlock() + } + res, err := s.llm.StoresGet(in) + if err != nil { + return nil, err + } + return &res, nil +} + +func (s *server) StoresFind(ctx context.Context, in *pb.StoresFindOptions) (*pb.StoresFindResult, error) { + if s.llm.Locking() { + s.llm.Lock() + defer s.llm.Unlock() + } + res, err := s.llm.StoresFind(in) + if err != nil { + return nil, err + } + return &res, nil +} + func StartServer(address string, model LLM) error { lis, err := net.Listen("tcp", address) if err != nil { diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index a6a84fd7..85744f9a 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -17,6 +17,7 @@ import ( var Aliases map[string]string = map[string]string{ "go-llama": LLamaCPP, "llama": LLamaCPP, + "embedded-store": LocalStoreBackend, } const ( @@ -34,6 +35,8 @@ const ( TinyDreamBackend = "tinydream" PiperBackend = "piper" LCHuggingFaceBackend = "langchain-huggingface" + + LocalStoreBackend = "local-store" ) var AutoLoadBackends []string = []string{ diff --git a/pkg/store/client.go b/pkg/store/client.go new file mode 100644 index 00000000..8facfdcf --- /dev/null +++ b/pkg/store/client.go @@ -0,0 +1,155 @@ +package store + +import ( + "context" + "fmt" + + grpc "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" +) + +// Wrapper for the GRPC client so that simple use cases are handled without verbosity + +// SetCols sets multiple key-value pairs in the store +// It's in columnar format so that keys[i] is associated with values[i] +func SetCols(ctx context.Context, c grpc.Backend, keys [][]float32, values [][]byte) error { + protoKeys := make([]*proto.StoresKey, len(keys)) + for i, k := range keys { + protoKeys[i] = &proto.StoresKey{ + Floats: k, + } + } + protoValues := make([]*proto.StoresValue, len(values)) + for i, v := range values { + protoValues[i] = &proto.StoresValue{ + Bytes: v, + } + } + setOpts := &proto.StoresSetOptions{ + Keys: protoKeys, + Values: protoValues, + } + + res, err := c.StoresSet(ctx, setOpts) + if err != nil { + return err + } + + if res.Success { + return nil + } + + return fmt.Errorf("failed to set keys: %v", res.Message) +} + +// SetSingle sets a single key-value pair in the store +// Don't call this in a tight loop, instead use SetCols +func SetSingle(ctx context.Context, c grpc.Backend, key []float32, value []byte) error { + return SetCols(ctx, c, [][]float32{key}, [][]byte{value}) +} + +// DeleteCols deletes multiple key-value pairs from the store +// It's in columnar format so that keys[i] is associated with values[i] +func DeleteCols(ctx context.Context, c grpc.Backend, keys [][]float32) error { + protoKeys := make([]*proto.StoresKey, len(keys)) + for i, k := range keys { + protoKeys[i] = &proto.StoresKey{ + Floats: k, + } + } + deleteOpts := &proto.StoresDeleteOptions{ + Keys: protoKeys, + } + + res, err := c.StoresDelete(ctx, deleteOpts) + if err != nil { + return err + } + + if res.Success { + return nil + } + + return fmt.Errorf("failed to delete keys: %v", res.Message) +} + +// DeleteSingle deletes a single key-value pair from the store +// Don't call this in a tight loop, instead use DeleteCols +func DeleteSingle(ctx context.Context, c grpc.Backend, key []float32) error { + return DeleteCols(ctx, c, [][]float32{key}) +} + +// GetCols gets multiple key-value pairs from the store +// It's in columnar format so that keys[i] is associated with values[i] +// Be warned the keys are sorted and will be returned in a different order than they were input +// There is no guarantee as to how the keys are sorted +func GetCols(ctx context.Context, c grpc.Backend, keys [][]float32) ([][]float32, [][]byte, error) { + protoKeys := make([]*proto.StoresKey, len(keys)) + for i, k := range keys { + protoKeys[i] = &proto.StoresKey{ + Floats: k, + } + } + getOpts := &proto.StoresGetOptions{ + Keys: protoKeys, + } + + res, err := c.StoresGet(ctx, getOpts) + if err != nil { + return nil, nil, err + } + + ks := make([][]float32, len(res.Keys)) + for i, k := range res.Keys { + ks[i] = k.Floats + } + vs := make([][]byte, len(res.Values)) + for i, v := range res.Values { + vs[i] = v.Bytes + } + + return ks, vs, nil +} + +// GetSingle gets a single key-value pair from the store +// Don't call this in a tight loop, instead use GetCols +func GetSingle(ctx context.Context, c grpc.Backend, key []float32) ([]byte, error) { + _, values, err := GetCols(ctx, c, [][]float32{key}) + if err != nil { + return nil, err + } + + if len(values) > 0 { + return values[0], nil + } + + return nil, fmt.Errorf("failed to get key") +} + +// Find similar keys to the given key. Returns the keys, values, and similarities +func Find(ctx context.Context, c grpc.Backend, key []float32, topk int) ([][]float32, [][]byte, []float32, error) { + findOpts := &proto.StoresFindOptions{ + Key: &proto.StoresKey{ + Floats: key, + }, + TopK: int32(topk), + } + + res, err := c.StoresFind(ctx, findOpts) + if err != nil { + return nil, nil, nil, err + } + + ks := make([][]float32, len(res.Keys)) + vs := make([][]byte, len(res.Values)) + + for i, k := range res.Keys { + ks[i] = k.Floats + } + + for i, v := range res.Values { + vs[i] = v.Bytes + } + + return ks, vs, res.Similarities, nil +} diff --git a/tests/integration/integration_suite_test.go b/tests/integration/integration_suite_test.go new file mode 100644 index 00000000..bbe8b5e0 --- /dev/null +++ b/tests/integration/integration_suite_test.go @@ -0,0 +1,17 @@ +package integration_test + +import ( + "os" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +func TestLocalAI(t *testing.T) { + log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI test suite") +} diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go new file mode 100644 index 00000000..a4ad4f90 --- /dev/null +++ b/tests/integration/stores_test.go @@ -0,0 +1,228 @@ +package integration_test + +import ( + "context" + "embed" + "math" + "os" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/assets" + "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/store" +) + +//go:embed backend-assets/* +var backendAssets embed.FS + +var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() { + Context("Embedded Store get,set and delete", func() { + var sl *model.ModelLoader + var sc grpc.Backend + var tmpdir string + + BeforeEach(func() { + var err error + + zerolog.SetGlobalLevel(zerolog.DebugLevel) + + tmpdir, err = os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + backendAssetsDir := filepath.Join(tmpdir, "backend-assets") + err = os.Mkdir(backendAssetsDir, 0755) + Expect(err).ToNot(HaveOccurred()) + + err = assets.ExtractFiles(backendAssets, backendAssetsDir) + Expect(err).ToNot(HaveOccurred()) + + debug := true + + bc := config.BackendConfig{ + Name: "store test", + Debug: &debug, + Backend: model.LocalStoreBackend, + } + + storeOpts := []model.Option{ + model.WithBackendString(bc.Backend), + model.WithAssetDir(backendAssetsDir), + model.WithModel("test"), + } + + sl = model.NewModelLoader("") + sc, err = sl.BackendLoader(storeOpts...) + Expect(err).ToNot(HaveOccurred()) + Expect(sc).ToNot(BeNil()) + }) + + AfterEach(func() { + sl.StopAllGRPC() + err := os.RemoveAll(tmpdir) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should be able to set a key", func() { + err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test")) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should be able to set keys", func() { + err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}, [][]byte{[]byte("test1"), []byte("test2")}) + Expect(err).ToNot(HaveOccurred()) + + err = store.SetCols(context.Background(), sc, [][]float32{{0.7, 0.8, 0.9}, {0.10, 0.11, 0.12}}, [][]byte{[]byte("test3"), []byte("test4")}) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should be able to get a key", func() { + err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test")) + Expect(err).ToNot(HaveOccurred()) + + val, err := store.GetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}) + Expect(err).ToNot(HaveOccurred()) + Expect(val).To(Equal([]byte("test"))) + }) + + It("should be able to get keys", func() { + //set 3 entries + err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")}) + Expect(err).ToNot(HaveOccurred()) + + //get 3 entries + keys, vals, err := store.GetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}}) + Expect(err).ToNot(HaveOccurred()) + Expect(keys).To(HaveLen(3)) + Expect(vals).To(HaveLen(3)) + for i, k := range keys { + v := vals[i] + + if k[0] == 0.1 && k[1] == 0.2 && k[2] == 0.3 { + Expect(v).To(Equal([]byte("test1"))) + } else if k[0] == 0.4 && k[1] == 0.5 && k[2] == 0.6 { + Expect(v).To(Equal([]byte("test2"))) + } else { + Expect(k).To(Equal([]float32{0.7, 0.8, 0.9})) + Expect(v).To(Equal([]byte("test3"))) + } + } + + //get 2 entries + keys, vals, err = store.GetCols(context.Background(), sc, [][]float32{{0.7, 0.8, 0.9}, {0.1, 0.2, 0.3}}) + Expect(err).ToNot(HaveOccurred()) + Expect(keys).To(HaveLen(2)) + Expect(vals).To(HaveLen(2)) + for i, k := range keys { + v := vals[i] + + if k[0] == 0.1 && k[1] == 0.2 && k[2] == 0.3 { + Expect(v).To(Equal([]byte("test1"))) + } else { + Expect(k).To(Equal([]float32{0.7, 0.8, 0.9})) + Expect(v).To(Equal([]byte("test3"))) + } + } + }) + + It("should be able to delete a key", func() { + err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test")) + Expect(err).ToNot(HaveOccurred()) + + err = store.DeleteSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}) + Expect(err).ToNot(HaveOccurred()) + + val, _ := store.GetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}) + Expect(val).To(BeNil()) + }) + + It("should be able to delete keys", func() { + //set 3 entries + err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")}) + Expect(err).ToNot(HaveOccurred()) + + //delete 2 entries + err = store.DeleteCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.7, 0.8, 0.9}}) + Expect(err).ToNot(HaveOccurred()) + + //get 1 entry + keys, vals, err := store.GetCols(context.Background(), sc, [][]float32{{0.4, 0.5, 0.6}}) + Expect(err).ToNot(HaveOccurred()) + Expect(keys).To(HaveLen(1)) + Expect(vals).To(HaveLen(1)) + Expect(keys[0]).To(Equal([]float32{0.4, 0.5, 0.6})) + Expect(vals[0]).To(Equal([]byte("test2"))) + + //get deleted entries + keys, vals, err = store.GetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.7, 0.8, 0.9}}) + Expect(err).ToNot(HaveOccurred()) + Expect(keys).To(HaveLen(0)) + Expect(vals).To(HaveLen(0)) + }) + + It("should be able to find smilar keys", func() { + // set 3 vectors that are at varying angles to {0.5, 0.5, 0.5} + err := store.SetCols(context.Background(), sc, [][]float32{{0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")}) + Expect(err).ToNot(HaveOccurred()) + + // find similar keys + keys, vals, sims, err := store.Find(context.Background(), sc, []float32{0.1, 0.3, 0.5}, 2) + Expect(err).ToNot(HaveOccurred()) + Expect(keys).To(HaveLen(2)) + Expect(vals).To(HaveLen(2)) + Expect(sims).To(HaveLen(2)) + + for i, k := range keys { + s := sims[i] + log.Debug().Float32("similarity", s).Msgf("key: %v", k) + } + + Expect(keys[0]).To(Equal([]float32{0.5, 0.5, 0.5})) + Expect(vals[0]).To(Equal([]byte("test1"))) + Expect(keys[1]).To(Equal([]float32{0.6, 0.6, -0.6})) + }) + + It("should be able to find similar normalized keys", func() { + // set 3 vectors that are at varying angles to {0.5, 0.5, 0.5} + keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}} + vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")} + // normalize the keys + for i, k := range keys { + norm := float64(0) + for _, x := range k { + norm += float64(x * x) + } + norm = math.Sqrt(norm) + for j, x := range k { + keys[i][j] = x / float32(norm) + } + } + + err := store.SetCols(context.Background(), sc, keys, vals) + Expect(err).ToNot(HaveOccurred()) + + // find similar keys + ks, vals, sims, err := store.Find(context.Background(), sc, keys[0], 3) + Expect(err).ToNot(HaveOccurred()) + Expect(ks).To(HaveLen(3)) + Expect(vals).To(HaveLen(3)) + Expect(sims).To(HaveLen(3)) + + for i, k := range ks { + s := sims[i] + log.Debug().Float32("similarity", s).Msgf("key: %v", k) + } + + Expect(ks[0]).To(Equal(keys[0])) + Expect(vals[0]).To(Equal(vals[0])) + Expect(sims[0]).To(BeNumerically("~", 1, 0.0001)) + Expect(ks[1]).To(Equal(keys[1])) + Expect(vals[1]).To(Equal(vals[1])) + }) + }) +}) From a922119c41b40f3e3b7cfaa0189b25e94e5d9a2c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 23 Mar 2024 09:23:28 +0100 Subject: [PATCH 0174/2895] :arrow_up: Update ggerganov/llama.cpp (#1881) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 94b5570c..2f50f362 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=d0a71233fbf8ade8ef06ad8e6b81d1d7b254895f +CPPLLAMA_VERSION?=56a00f0a2f48a85376f48b5ce77699df781631ae # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 1f501cc1eff7dabf7ac829697214dd726a538e70 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 23 Mar 2024 10:42:14 +0100 Subject: [PATCH 0175/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c58428f7..b101ee7f 100644 --- a/README.md +++ b/README.md @@ -43,14 +43,14 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) -- All-in-one image: https://github.com/mudler/LocalAI/issues/1855 +- Vector store: https://github.com/mudler/LocalAI/pull/1795 +- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 - Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 - Upload file API: https://github.com/mudler/LocalAI/pull/1703 - Tools API support: https://github.com/mudler/LocalAI/pull/1715 - LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 - ROCm container images: https://github.com/mudler/LocalAI/pull/1595 - Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653 -- Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651 - Mamba support: https://github.com/mudler/LocalAI/pull/1589 - Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 - 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489 From 8495750cb8b147babb67d9111a7d9c692b69e3e1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 23 Mar 2024 15:22:26 +0100 Subject: [PATCH 0176/2895] Update release.yml Signed-off-by: Ettore Di Giacinto --- .github/release.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/release.yml b/.github/release.yml index c86866c5..8c2c11f9 100644 --- a/.github/release.yml +++ b/.github/release.yml @@ -12,13 +12,23 @@ changelog: - title: "Bug fixes :bug:" labels: - bug + - regression - title: Exciting New Features 🎉 labels: - Semver-Minor - enhancement + - ux + - roadmap + - title: 🧠 Models + labels: + - area/ai-model + - title: 📖 Documentation and examples + labels: + - kind/documentation + - examples - title: 👒 Dependencies labels: - dependencies - title: Other Changes labels: - - "*" \ No newline at end of file + - "*" From d9456f2a23e8d3a2250909c27c203e44084fc746 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 23 Mar 2024 15:54:14 +0100 Subject: [PATCH 0177/2895] ci(aio): publish hipblas and Intel GPU images (#1883) Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 8e2bbbdd..484e505f 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -114,6 +114,7 @@ jobs: tag-suffix: '-hipblas-ffmpeg' ffmpeg: 'true' image-type: 'extras' + aio: "-aio-gpu-hipblas" base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' - build-type: 'hipblas' @@ -132,6 +133,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + aio: "-aio-gpu-intel-f16" - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' @@ -140,6 +142,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + aio: "-aio-gpu-intel-f32" # Core images - build-type: 'sycl_f16' platforms: 'linux/amd64' From 49cec7fd6162d0d997b4fee938c26c1d8a275847 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 23 Mar 2024 16:08:32 +0100 Subject: [PATCH 0178/2895] ci(aio): add latest tag images (#1884) Tangentially also fixes #1868 --- .github/workflows/image.yml | 14 +++++++------- .github/workflows/image_build.yml | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 484e505f..6e93cb9a 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -51,7 +51,7 @@ jobs: base-image: "ubuntu:22.04" - build-type: '' platforms: 'linux/amd64' - tag-latest: 'false' + tag-latest: 'auto' tag-suffix: '-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -81,7 +81,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'false' + tag-latest: 'auto' tag-suffix: '-cublas-cuda11-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -92,7 +92,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "1" platforms: 'linux/amd64' - tag-latest: 'false' + tag-latest: 'auto' tag-suffix: '-cublas-cuda12-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -110,7 +110,7 @@ jobs: runs-on: 'arc-runner-set' - build-type: 'hipblas' platforms: 'linux/amd64' - tag-latest: 'false' + tag-latest: 'auto' tag-suffix: '-hipblas-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -127,7 +127,7 @@ jobs: runs-on: 'arc-runner-set' - build-type: 'sycl_f16' platforms: 'linux/amd64' - tag-latest: 'false' + tag-latest: 'auto' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f16-ffmpeg' ffmpeg: 'true' @@ -136,7 +136,7 @@ jobs: aio: "-aio-gpu-intel-f16" - build-type: 'sycl_f32' platforms: 'linux/amd64' - tag-latest: 'false' + tag-latest: 'auto' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f32-ffmpeg' ffmpeg: 'true' @@ -218,7 +218,7 @@ jobs: include: - build-type: '' platforms: 'linux/amd64' - tag-latest: 'false' + tag-latest: 'auto' tag-suffix: '-ffmpeg-core' ffmpeg: 'true' image-type: 'core' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 8f1386c6..659f85de 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -145,6 +145,7 @@ jobs: type=ref,event=branch type=semver,pattern={{raw}} flavor: | + latest=${{ inputs.tag-latest }} suffix=${{ inputs.aio }} - name: Docker meta AIO (dockerhub) if: inputs.aio != '' @@ -157,6 +158,7 @@ jobs: type=ref,event=branch type=semver,pattern={{raw}} flavor: | + latest=${{ inputs.tag-latest }} suffix=${{ inputs.aio }} - name: Set up QEMU uses: docker/setup-qemu-action@master From bd25d8049c611b5ee5329cdcdc2eb0c6317f29bb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 23 Mar 2024 16:19:57 +0100 Subject: [PATCH 0179/2895] fix(watchdog): use ShutdownModel instead of StopModel (#1882) Fixes #1760 --- pkg/model/loader.go | 4 ++-- pkg/model/watchdog.go | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pkg/model/loader.go b/pkg/model/loader.go index c2c9df0e..003d8327 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -155,10 +155,10 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error { ml.mu.Lock() defer ml.mu.Unlock() - return ml.StopModel(modelName) + return ml.stopModel(modelName) } -func (ml *ModelLoader) StopModel(modelName string) error { +func (ml *ModelLoader) stopModel(modelName string) error { defer ml.deleteProcess(modelName) if _, ok := ml.models[modelName]; !ok { return fmt.Errorf("model %s not found", modelName) diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go index cf313180..c93cb99a 100644 --- a/pkg/model/watchdog.go +++ b/pkg/model/watchdog.go @@ -30,7 +30,7 @@ type WatchDog struct { } type ProcessManager interface { - StopModel(modelName string) error + ShutdownModel(modelName string) error } func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy, idle bool) *WatchDog { @@ -112,9 +112,10 @@ func (wd *WatchDog) checkIdle() { log.Warn().Msgf("[WatchDog] Address %s is idle for too long, killing it", address) p, ok := wd.addressModelMap[address] if ok { - if err := wd.pm.StopModel(p); err != nil { + if err := wd.pm.ShutdownModel(p); err != nil { log.Error().Msgf("[watchdog] Error shutting down model %s: %v", p, err) } + log.Debug().Msgf("[WatchDog] model shut down: %s", address) delete(wd.idleTime, address) delete(wd.addressModelMap, address) delete(wd.addressMap, address) @@ -139,9 +140,10 @@ func (wd *WatchDog) checkBusy() { model, ok := wd.addressModelMap[address] if ok { log.Warn().Msgf("[WatchDog] Model %s is busy for too long, killing it", model) - if err := wd.pm.StopModel(model); err != nil { + if err := wd.pm.ShutdownModel(model); err != nil { log.Error().Msgf("[watchdog] Error shutting down model %s: %v", model, err) } + log.Debug().Msgf("[WatchDog] model shut down: %s", address) delete(wd.timetable, address) delete(wd.addressModelMap, address) delete(wd.addressMap, address) @@ -149,7 +151,6 @@ func (wd *WatchDog) checkBusy() { log.Warn().Msgf("[WatchDog] Address %s unresolvable", address) delete(wd.timetable, address) } - } } } From 0106c5818186bbb24298d53df7a88db1822d38f4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 24 Mar 2024 14:54:01 +0100 Subject: [PATCH 0180/2895] :arrow_up: Update ggerganov/llama.cpp (#1885) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2f50f362..adb3c813 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=56a00f0a2f48a85376f48b5ce77699df781631ae +CPPLLAMA_VERSION?=95562175f83a49755ff6fd3bad09409417c8e6f9 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 3e293f14659d7228216a20897e3d2d695909a86a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 24 Mar 2024 22:12:18 +0100 Subject: [PATCH 0181/2895] :arrow_up: Update ggerganov/llama.cpp (#1889) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index adb3c813..c12ea8c0 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=95562175f83a49755ff6fd3bad09409417c8e6f9 +CPPLLAMA_VERSION?=a0e584defd8c16e7a51ab895f595df0448d710d0 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 6cf99527f8aa3c057bef68ce57809dcacfb15612 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 25 Mar 2024 03:01:30 +0100 Subject: [PATCH 0182/2895] docs(aio): Add All-in-One images docs (#1887) * docs(aio): Add AIO images docs * add image generation link to quickstart * while reviewing I noticed this one link was missing, so quickly adding it. Signed-off-by: Dave Co-authored-by: Dave --- Dockerfile | 3 +- docs/content/docs/getting-started/build.md | 33 ++++--- .../docs/getting-started/quickstart.md | 98 ++++++++++++++++--- 3 files changed, 107 insertions(+), 27 deletions(-) diff --git a/Dockerfile b/Dockerfile index b083690e..8725e76d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -239,6 +239,7 @@ RUN mkdir -p /build/models # Define the health check command HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1 - + +VOLUME /build/models EXPOSE 8080 ENTRYPOINT [ "/build/entrypoint.sh" ] diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index 238bdbec..8ceaf1f5 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -15,19 +15,7 @@ LocalAI's extensible architecture allows you to add your own backends, which can In some cases you might want to re-build LocalAI from source (for instance to leverage Apple Silicon acceleration), or to build a custom container image with your own backends. This section contains instructions on how to build LocalAI from source. -#### Container image -Requirements: - -- Docker or podman, or a container engine - -In order to build the `LocalAI` container image locally you can use `docker`, for example: - -``` -# build the image -docker build -t localai . -docker run localai -``` #### Build LocalAI locally @@ -111,6 +99,27 @@ docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS {{% /alert %}} +#### Container image + +Requirements: + +- Docker or podman, or a container engine + +In order to build the `LocalAI` container image locally you can use `docker`, for example: + +``` +# build the image +docker build -t localai . +docker run localai +``` + +There are some build arguments that can be used to customize the build: + +| Variable | Default | Description | +| ---------------------| ------- | ----------- | +| `IMAGE_TYPE` | `extras` | Build type. Available: `core`, `extras` | + + ### Example: Build on mac Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`. diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index e786d6d8..b5fd65d3 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -10,17 +10,8 @@ icon = "rocket_launch" **LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run [LLMs]({{%relref "docs/features/text-generation" %}}), generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. -## Installation Methods - LocalAI is available as a container image and binary, compatible with various container engines like Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai). Binaries can be downloaded from [GitHub](https://github.com/mudler/LocalAI/releases). - -{{% alert icon="💡" %}} - -**Hardware Requirements:** The hardware requirements for LocalAI vary based on the model size and quantization method used. For performance benchmarks with different backends, such as `llama.cpp`, visit [this link](https://github.com/ggerganov/llama.cpp#memorydisk-requirements). The `rwkv` backend is noted for its lower resource consumption. - -{{% /alert %}} - ## Prerequisites Before you begin, ensure you have a container engine installed if you are not using the binaries. Suitable options include Docker or Podman. For installation instructions, refer to the following guides: @@ -29,14 +20,80 @@ Before you begin, ensure you have a container engine installed if you are not us - [Install Podman (Linux)](https://podman.io/getting-started/installation) - [Install Docker engine (Servers)](https://docs.docker.com/engine/install/#get-started) +{{% alert icon="💡" %}} + +**Hardware Requirements:** The hardware requirements for LocalAI vary based on the model size and quantization method used. For performance benchmarks with different backends, such as `llama.cpp`, visit [this link](https://github.com/ggerganov/llama.cpp#memorydisk-requirements). The `rwkv` backend is noted for its lower resource consumption. + +{{% /alert %}} + +## Running LocalAI with All-in-One (AIO) Images + +LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. + +These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. + +It suggested to use the AIO images if you don't want to configure the models to run on LocalAI. If you want to run specific models, you can use the [manual method]({{%relref "docs/getting-started/manual" %}}). + +The AIO Images comes pre-configured with the following features: +- Text to Speech (TTS) +- Speech to Text +- Function calling +- Large Language Models (LLM) for text generation +- Image generation +- Embedding server + + +Start the image with Docker: + +```bash +docker run -p 8080:8080 --name local-ai -ti localai/localai:{{< version >}}-aio-cpu +# For Nvidia GPUs: +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-11 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-12 +``` + + +Or with a docker-compose file: + +```yaml +version: "3.9" +services: + api: + image: localai/localai:{{< version >}}-aio-cpu + # For Nvidia GPUs decomment one of the following (cuda11 or cuda12): + # image: localai/localai:{{< version >}}-aio-gpu-cuda-11 + # image: localai/localai:{{< version >}}-aio-gpu-cuda-12 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] + interval: 1m + timeout: 120m + retries: 120 + ports: + - 8080:8080 + environment: + - DEBUG=true + # ... + volumes: + - ./models:/build/models:cached + # decomment the following piece if running with Nvidia GPUs + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] +``` ## Running Models > _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_. -LocalAI allows one-click runs with popular models. It downloads the model and starts the API with the model loaded. +To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model. -There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture. +To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs. + +There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Image Generation]({{%relref "docs/features/image-generation" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture. {{% alert icon="💡" %}} @@ -51,7 +108,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | Model | Category | Docker command | | --- | --- | --- | | [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` | -| 🌋 [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava``` | +| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` | +| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` | +| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` | +| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` | | [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` | | [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` | | [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` | @@ -68,7 +128,9 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | | [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` | {{% /tab %}} + {{% tab tabName="GPU (CUDA 11)" %}} @@ -77,7 +139,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | Model | Category | Docker command | | --- | --- | --- | | [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` | -| 🌋 [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core llava``` | +| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` | +| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` | +| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` | +| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` | | [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` | | [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` | | [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` | @@ -94,6 +159,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` | | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` | | [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` | {{% /tab %}} @@ -104,7 +170,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | Model | Category | Docker command | | --- | --- | --- | | [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` | -| 🌋 [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core llava``` | +| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` | +| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` | +| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` | +| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` | | [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` | | [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` | | [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` | @@ -121,6 +190,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` | | [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` | {{% /tab %}} {{< /tabs >}} From 5e1238252458a548cba197f7c1e88fac44d6f3e7 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Mon, 25 Mar 2024 00:32:40 -0700 Subject: [PATCH 0183/2895] NVIDIA GPU detection support for WSL2 environments (#1891) This change makes the assumption that "Microsoft Corporation Device 008e" is an NVIDIA CUDA device. If this is not the case, please update the hardware detection script here. Signed-off-by: Enrico Ros Co-authored-by: Dave --- aio/entrypoint.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index b2f64f63..aeb5e4de 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -33,6 +33,17 @@ function detect_gpu() { else echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available." fi + elif lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then + # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi + # Make sure the container was run with `--gpus all` as the only required parameter + echo "NVIDIA GPU detected via WSL2" + # nvidia-smi should be installed in the container + if nvidia-smi; then + GPU_ACCELERATION=true + GPU_VENDOR=nvidia + else + echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available." + fi fi ;; Darwin) @@ -95,4 +106,4 @@ check_vars echo "Starting LocalAI with the following models: $MODELS" -/build/entrypoint.sh "$@" \ No newline at end of file +/build/entrypoint.sh "$@" From 08c7b172980d707324ee7545e6f8b5be8dbddf4b Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Mon, 25 Mar 2024 10:36:18 -0700 Subject: [PATCH 0184/2895] Fix NVIDIA VRAM detection on WSL2 environments (#1894) * NVIDIA VRAM detection on WSL2 environments More robust single NVIDIA GPU memory detection, following the improved NVIDIA WSL2 detection patch yesterday #1891. Tested and working on WSL2, Linux. Signed-off-by: Enrico Ros * Update aio/entrypoint.sh Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Enrico Ros Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- aio/entrypoint.sh | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index aeb5e4de..795cb86a 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -57,29 +57,33 @@ function detect_gpu() { } function detect_gpu_size() { - if [ "$GPU_ACCELERATION" = true ]; then - GPU_SIZE=gpu-8g - fi - # Attempting to find GPU memory size for NVIDIA GPUs - if echo "$gpu_model" | grep -iq nvidia; then + if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then echo "NVIDIA GPU detected. Attempting to find memory size..." - nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits)) + # Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected. + # If handling multiple GPUs is required in the future, this is the place to do it + nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1) if [ ! -z "$nvidia_sm" ]; then - echo "Total GPU Memory: ${nvidia_sm[0]} MiB" + echo "Total GPU Memory: $nvidia_sm MiB" + # if bigger than 8GB, use 16GB + #if [ "$nvidia_sm" -gt 8192 ]; then + # GPU_SIZE=gpu-16g + #else + GPU_SIZE=gpu-8g + #fi else - echo "Unable to determine NVIDIA GPU memory size." + echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU." + GPU_SIZE=gpu-8g fi - # if bigger than 8GB, use 16GB - #if [ "$nvidia_sm" -gt 8192 ]; then - # GPU_SIZE=gpu-16g - #fi - else - echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script." - fi + + # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs + elif [ "$GPU_ACCELERATION" = true ]; then + echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented." + GPU_SIZE=gpu-8g # default to cpu if GPU_SIZE is not set - if [ -z "$GPU_SIZE" ]; then + else + echo "GPU acceleration is not enabled or supported. Defaulting to CPU." GPU_SIZE=cpu fi } From c9adc5680c1637efb60662e9c5d71c777c59a046 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 25 Mar 2024 22:04:32 +0100 Subject: [PATCH 0185/2895] fix(aio): make image-gen for GPU functional, update docs (#1895) * readme: update quickstart * aio(gpu): fix dreamshaper * tests(aio): allow to run tests also against an endpoint * docs: split content * tests: less verbosity --------- Co-authored-by: Dave --- README.md | 31 +-- aio/gpu-8g/image-gen.yaml | 6 +- .../docs/getting-started/quickstart.md | 192 ++---------------- .../docs/getting-started/run-other-models.md | 126 ++++++++++++ docs/content/docs/overview.md | 48 ++++- docs/content/docs/reference/aio-images.md | 39 ++++ .../docs/reference/container-images.md | 103 ++++++++++ tests/e2e-aio/e2e_suite_test.go | 67 +++--- tests/e2e-aio/e2e_test.go | 2 +- 9 files changed, 380 insertions(+), 234 deletions(-) create mode 100644 docs/content/docs/getting-started/run-other-models.md create mode 100644 docs/content/docs/reference/aio-images.md create mode 100644 docs/content/docs/reference/container-images.md diff --git a/README.md b/README.md index b101ee7f..7ba96ad5 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,14 @@

-[](https://hub.docker.com/r/localai/localai) -[](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) - -> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) -> -> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) - -[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) +

+ +LocalAI Docker hub + + +LocalAI Quay.io + +

@@ -36,8 +36,11 @@ Join LocalAI Discord Community +

-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. +[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) + +**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. ## 🔥🔥 Hot topics / Roadmap @@ -67,10 +70,14 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl ## 💻 [Getting started](https://localai.io/basics/getting_started/index.html) -For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`: +For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. -``` -docker run -ti -p 8080:8080 localai/localai:v2.9.0-ffmpeg-core phi-2 +For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO(All-in-one) Image using `docker`: + +```bash +docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu +# or, if you have an Nvidia GPU: +# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-cuda12 ``` ## 🚀 [Features](https://localai.io/features/) diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml index 9868572f..53994ebb 100644 --- a/aio/gpu-8g/image-gen.yaml +++ b/aio/gpu-8g/image-gen.yaml @@ -1,6 +1,6 @@ name: stablediffusion parameters: - model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors + model: DreamShaper_8_pruned.safetensors backend: diffusers step: 25 f16: true @@ -11,6 +11,10 @@ diffusers: enable_parameters: "negative_prompt,num_inference_steps" scheduler_type: "k_dpmpp_2m" +download_files: +- filename: DreamShaper_8_pruned.safetensors + uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors + usage: | curl http://localhost:8080/v1/images/generations \ -H "Content-Type: application/json" \ diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index b5fd65d3..33ec4cfa 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -28,6 +28,8 @@ Before you begin, ensure you have a container engine installed if you are not us ## Running LocalAI with All-in-One (AIO) Images +> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}}) or [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) to use an already-configured model_. + LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. @@ -46,10 +48,10 @@ The AIO Images comes pre-configured with the following features: Start the image with Docker: ```bash -docker run -p 8080:8080 --name local-ai -ti localai/localai:{{< version >}}-aio-cpu +docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu # For Nvidia GPUs: -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-11 -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-12 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12 ``` @@ -85,185 +87,15 @@ services: # capabilities: [gpu] ``` -## Running Models - -> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_. - -To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model. - -To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs. - -There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Image Generation]({{%relref "docs/features/image-generation" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture. - -{{% alert icon="💡" %}} - -To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models). -{{% /alert %}} - -{{< tabs tabTotal="3" >}} -{{% tab tabName="CPU-only" %}} - -> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies - -| Model | Category | Docker command | -| --- | --- | --- | -| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` | -| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` | -| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` | -| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` | -| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` | -| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` | -| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` | -| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` | -| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` | -| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` | -| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` | -| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` | -| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` | -| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` | -| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` | -| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` | -| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | -| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only | -| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | -| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | -| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` | -{{% /tab %}} - -{{% tab tabName="GPU (CUDA 11)" %}} - - -> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}). - -| Model | Category | Docker command | -| --- | --- | --- | -| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` | -| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` | -| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` | -| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` | -| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` | -| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` | -| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` | -| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` | -| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` | -| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` | -| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` | -| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` | -| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` | -| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` | -| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` | -| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` | -| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` | -| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` | -| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` | -| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` | -| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` | -{{% /tab %}} - - -{{% tab tabName="GPU (CUDA 12)" %}} - -> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}). - -| Model | Category | Docker command | -| --- | --- | --- | -| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` | -| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` | -| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` | -| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` | -| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` | -| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` | -| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` | -| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` | -| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` | -| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` | -| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` | -| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` | -| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` | -| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` | -| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` | -| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` | -| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` | -| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` | -| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | -| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` | -| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` | -{{% /tab %}} - -{{< /tabs >}} - -{{% alert icon="💡" %}} -**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured: - -```bash -docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2 -``` - -{{% /alert %}} - -## Container images - -LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai). - -For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}). - -{{% alert icon="💡" %}} - -**Available Images Types**: - -- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. -- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features. -- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}). - -{{% /alert %}} - -{{< tabs tabTotal="3" >}} -{{% tab tabName="Vanilla / CPU Images" %}} - -| Description | Quay | Docker Hub | -| --- | --- |-----------------------------------------------| -| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master` | -| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}` | -| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg` | `localai/localai:{{< version >}}-ffmpeg` | -| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core` | `localai/localai:{{< version >}}-ffmpeg-core` | - -{{% /tab %}} - -{{% tab tabName="GPU Images CUDA 11" %}} - -| Description | Quay | Docker Hub | -| --- | --- |-------------------------------------------------------------| -| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11` | -| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda11` | `localai/localai:latest-cublas-cuda11` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11` | -| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg` | -| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg-core` | - -{{% /tab %}} - -{{% tab tabName="GPU Images CUDA 12" %}} - -| Description | Quay | Docker Hub | -| --- | --- |-------------------------------------------------------------| -| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12` | -| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda12` | `localai/localai:latest-cublas-cuda12` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12` | -| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg` | -| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg-core` | - -{{% /tab %}} - -{{< /tabs >}} +For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}). ## What's next? Explore further resources and community contributions: -- [Community How to's](https://io.midori-ai.xyz/howtos/) -- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples) - -[![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)](https://github.com/mudler/LocalAI/tree/master/examples#examples) +- [Build LocalAI and the container image]({{%relref "docs/getting-started/build" %}}) +- [Run models manually]({{%relref "docs/getting-started/manual" %}}) +- [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) +- [Container images]({{%relref "docs/reference/container-images" %}}) +- [All-in-one Images]({{%relref "docs/reference/aio-images" %}}) +- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples) \ No newline at end of file diff --git a/docs/content/docs/getting-started/run-other-models.md b/docs/content/docs/getting-started/run-other-models.md new file mode 100644 index 00000000..4420550d --- /dev/null +++ b/docs/content/docs/getting-started/run-other-models.md @@ -0,0 +1,126 @@ ++++ +disableToc = false +title = "Run other Models" +weight = 3 +icon = "rocket_launch" + ++++ + +## Running other models + +> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_. + +To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model. + +To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs. + +There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture. + +{{% alert icon="💡" %}} + +To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models). +{{% /alert %}} + +{{< tabs tabTotal="3" >}} +{{% tab tabName="CPU-only" %}} + +> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies + +| Model | Category | Docker command | +| --- | --- | --- | +| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` | +| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` | +| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` | +| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` | +| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` | +| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` | +| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` | +| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` | +| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` | +| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` | +| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` | +| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` | +| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` | +| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` | +| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` | +| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` | +| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | +| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only | +| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | +| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | +| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` | +{{% /tab %}} + +{{% tab tabName="GPU (CUDA 11)" %}} + + +> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}). + +| Model | Category | Docker command | +| --- | --- | --- | +| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` | +| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` | +| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` | +| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` | +| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` | +| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` | +| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` | +| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` | +| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` | +| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` | +| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` | +| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` | +| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` | +| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` | +| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` | +| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` | +| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` | +| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` | +| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` | +| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` | +| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` | +{{% /tab %}} + + +{{% tab tabName="GPU (CUDA 12)" %}} + +> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}). + +| Model | Category | Docker command | +| --- | --- | --- | +| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` | +| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` | +| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` | +| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` | +| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` | +| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` | +| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` | +| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` | +| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` | +| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` | +| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` | +| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` | +| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` | +| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` | +| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` | +| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` | +| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` | +| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` | +| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | +| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` | +| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` | +{{% /tab %}} + +{{< /tabs >}} + +{{% alert icon="💡" %}} +**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured: + +```bash +docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2 +``` + +{{% /alert %}} \ No newline at end of file diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index f78a9be0..40ec9e4f 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -31,14 +31,14 @@ icon = "info"

-[](https://hub.docker.com/r/localai/localai) -[](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) - -> 💡 Get help - [❓FAQ](https://localai.io/faq/) [❓How tos](https://io.midori-ai.xyz/howtos/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy) -> -> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) - -**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler). +

+ +LocalAI Docker hub + + +LocalAI Quay.io + +

@@ -47,6 +47,35 @@ icon = "info" Join LocalAI Discord Community +

+ + +> 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy) +> +> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) + + + + +**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler). + + +## Start LocalAI + +Start the image with Docker to have a functional clone of OpenAI! 🚀: + +```bash +docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu +# Do you have a Nvidia GPUs? Use this instead +# CUDA 11 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11 +# CUDA 12 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12 +``` + +See the [💻 Quickstart](https://localai.io/basics/getting_started/) for all the options and way you can run LocalAI! + +## What is LocalAI? In a nutshell: @@ -61,8 +90,7 @@ LocalAI is focused on making the AI accessible to anyone. Any contribution, feed Note that this started just as a fun weekend project by [mudler](https://github.com/mudler) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)! - -## 🚀 Features +### 🚀 Features - 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table)) - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/) diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md new file mode 100644 index 00000000..9c569fb5 --- /dev/null +++ b/docs/content/docs/reference/aio-images.md @@ -0,0 +1,39 @@ + ++++ +disableToc = false +title = "All-In-One images" +weight = 26 ++++ + +All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. + + +| Description | Quay | Docker Hub | +| --- | --- |-----------------------------------------------| +| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | +| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | +| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | +| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | +| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | +| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` | +| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` | + +## Available environment variables + +The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image + +| Variable | Default | Description | +| ---------------------| ------- | ----------- | +| `SIZE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | +| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) | + + +## Example + +Start the image with Docker: + +```bash +docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu +``` + +LocalAI will automatically download all the required models, and will be available at [localhost:8080](http://localhost:8080/v1/models). diff --git a/docs/content/docs/reference/container-images.md b/docs/content/docs/reference/container-images.md new file mode 100644 index 00000000..6531dd97 --- /dev/null +++ b/docs/content/docs/reference/container-images.md @@ -0,0 +1,103 @@ + ++++ +disableToc = false +title = "Available Container images" +weight = 25 ++++ + +LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai). + +> _For All-in-One image with a pre-configured set of models and backends, see the [AIO Images]({{%relref "docs/reference/aio-images" %}})._ + +For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}). + +{{% alert icon="💡" %}} + +**Available Images Types**: + +- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. +- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration. +- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features. +- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}). + +{{% /alert %}} + +{{< tabs tabTotal="6" >}} +{{% tab tabName="Vanilla / CPU Images" %}} + +| Description | Quay | Docker Hub | +| --- | --- |-----------------------------------------------| +| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master` | +| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}` | +| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg` | `localai/localai:{{< version >}}-ffmpeg` | +| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core` | `localai/localai:{{< version >}}-ffmpeg-core` | + +{{% /tab %}} + +{{% tab tabName="GPU Images CUDA 11" %}} + +| Description | Quay | Docker Hub | +| --- | --- |-------------------------------------------------------------| +| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11` | +| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda11` | `localai/localai:latest-cublas-cuda11` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11` | +| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg` | +| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg-core` | + +{{% /tab %}} + +{{% tab tabName="GPU Images CUDA 12" %}} + +| Description | Quay | Docker Hub | +| --- | --- |-------------------------------------------------------------| +| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12` | +| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda12` | `localai/localai:latest-cublas-cuda12` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12` | +| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg` | +| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg-core` | + +{{% /tab %}} + +{{% tab tabName="Intel GPU (sycl f16)" %}} + +| Description | Quay | Docker Hub | +| --- | --- |-------------------------------------------------------------| +| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16` | +| Latest tag | `quay.io/go-skynet/local-ai:latest-sycl-f16` | `localai/localai:latest-sycl-f16` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16` | +| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg` | +| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` | + +{{% /tab %}} + +{{% tab tabName="Intel GPU (sycl f32)" %}} + +| Description | Quay | Docker Hub | +| --- | --- |-------------------------------------------------------------| +| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32` | +| Latest tag | `quay.io/go-skynet/local-ai:latest-sycl-f32` | `localai/localai:latest-sycl-f32` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32` | +| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg` | +| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` | + +{{% /tab %}} + +{{% tab tabName="AMD GPU" %}} + +| Description | Quay | Docker Hub | +| --- | --- |-------------------------------------------------------------| +| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-hipblas` | `localai/localai:master-hipblas` | +| Latest tag | `quay.io/go-skynet/local-ai:latest-hipblas` | `localai/localai:latest-hipblas` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas` | `localai/localai:{{< version >}}-hipblas` | +| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-hipblas-ffmpeg` | `localai/localai:{{< version >}}-hipblas-ffmpeg` | +| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas-ffmpeg-core` | `localai/localai:{{< version >}}-hipblas-ffmpeg-core` | + +{{% /tab %}} + +{{< /tabs >}} + +## See Also + +- [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}) +- [AIO Images]({{%relref "docs/reference/aio-images" %}}) \ No newline at end of file diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go index 00fc6d2a..fa61c408 100644 --- a/tests/e2e-aio/e2e_suite_test.go +++ b/tests/e2e-aio/e2e_suite_test.go @@ -22,6 +22,7 @@ var containerImage = os.Getenv("LOCALAI_IMAGE") var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG") var modelsDir = os.Getenv("LOCALAI_MODELS_DIR") var apiPort = os.Getenv("LOCALAI_API_PORT") +var apiEndpoint = os.Getenv("LOCALAI_API_ENDPOINT") func TestLocalAI(t *testing.T) { RegisterFailHandler(Fail) @@ -30,16 +31,45 @@ func TestLocalAI(t *testing.T) { var _ = BeforeSuite(func() { - if containerImage == "" { - Fail("LOCALAI_IMAGE is not set") - } - if containerImageTag == "" { - Fail("LOCALAI_IMAGE_TAG is not set") - } if apiPort == "" { apiPort = "8080" } + var defaultConfig openai.ClientConfig + if apiEndpoint == "" { + startDockerImage() + defaultConfig = openai.DefaultConfig("") + defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1" + } else { + fmt.Println("Default ", apiEndpoint) + defaultConfig = openai.DefaultConfig("") + defaultConfig.BaseURL = apiEndpoint + } + + // Wait for API to be ready + client = openai.NewClientWithConfig(defaultConfig) + + Eventually(func() error { + _, err := client.ListModels(context.TODO()) + return err + }, "20m").ShouldNot(HaveOccurred()) +}) + +var _ = AfterSuite(func() { + if resource != nil { + Expect(pool.Purge(resource)).To(Succeed()) + } + //dat, err := os.ReadFile(resource.Container.LogPath) + //Expect(err).To(Not(HaveOccurred())) + //Expect(string(dat)).To(ContainSubstring("GRPC Service Ready")) + //fmt.Println(string(dat)) +}) + +var _ = AfterEach(func() { + //Expect(dbClient.Clear()).To(Succeed()) +}) + +func startDockerImage() { p, err := dockertest.NewPool("") Expect(err).To(Not(HaveOccurred())) Expect(p.Client.Ping()).To(Succeed()) @@ -71,27 +101,4 @@ var _ = BeforeSuite(func() { Expect(err).To(Not(HaveOccurred())) resource = r - - defaultConfig := openai.DefaultConfig("") - defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1" - - // Wait for API to be ready - client = openai.NewClientWithConfig(defaultConfig) - - Eventually(func() error { - _, err := client.ListModels(context.TODO()) - return err - }, "20m").ShouldNot(HaveOccurred()) -}) - -var _ = AfterSuite(func() { - Expect(pool.Purge(resource)).To(Succeed()) - //dat, err := os.ReadFile(resource.Container.LogPath) - //Expect(err).To(Not(HaveOccurred())) - //Expect(string(dat)).To(ContainSubstring("GRPC Service Ready")) - //fmt.Println(string(dat)) -}) - -var _ = AfterEach(func() { - //Expect(dbClient.Clear()).To(Succeed()) -}) +} diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go index 03d9fda9..c52d789e 100644 --- a/tests/e2e-aio/e2e_test.go +++ b/tests/e2e-aio/e2e_test.go @@ -51,7 +51,7 @@ var _ = Describe("E2E test", func() { ) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp)) - Expect(resp.Data[0].URL).To(ContainSubstring("http://localhost:8080"), fmt.Sprint(resp.Data[0].URL)) + Expect(resp.Data[0].URL).To(ContainSubstring("png"), fmt.Sprint(resp.Data[0].URL)) }) }) Context("embeddings", func() { From 42a4c86dca03baad7597389ca30029ae4c32e7a2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 26 Mar 2024 00:33:46 +0100 Subject: [PATCH 0186/2895] :arrow_up: Update ggerganov/whisper.cpp (#1896) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c12ea8c0..59477f59 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=fff24a0148fe194df4997a738eeceddd724959c3 +WHISPER_CPP_VERSION?=1558ec5a16cb2b2a0bf54815df1d41f83dc3815b # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 1395e505cd8f1cc90ce575602c7eb21706da6067 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 26 Mar 2024 00:34:10 +0100 Subject: [PATCH 0187/2895] :arrow_up: Update ggerganov/llama.cpp (#1897) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 59477f59..518287da 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a0e584defd8c16e7a51ab895f595df0448d710d0 +CPPLLAMA_VERSION?=b06c16ef9f81d84da520232c125d4d8a1d273736 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e58410fa99996d9927b06d5d1cab0e072486edac Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 26 Mar 2024 18:45:25 +0100 Subject: [PATCH 0188/2895] feat(aio): add intel profile (#1901) * feat(aio): add intel profile * docs: clarify AIO images features --- README.md | 4 ++ aio/cpu/embeddings.yaml | 8 +--- aio/cpu/image-gen.yaml | 11 ++++- aio/cpu/vision.yaml | 9 ---- aio/entrypoint.sh | 4 +- aio/gpu-8g/embeddings.yaml | 1 - aio/gpu-8g/image-gen.yaml | 1 - aio/gpu-8g/vision.yaml | 2 - aio/intel/embeddings.yaml | 12 ++++++ aio/intel/image-gen.yaml | 20 +++++++++ aio/intel/speech-to-text.yaml | 18 ++++++++ aio/intel/text-to-speech.yaml | 15 +++++++ aio/intel/text-to-text.yaml | 51 +++++++++++++++++++++++ aio/intel/vision.yaml | 35 ++++++++++++++++ docs/content/docs/overview.md | 1 - docs/content/docs/reference/aio-images.md | 9 ++++ 16 files changed, 178 insertions(+), 23 deletions(-) create mode 100644 aio/intel/embeddings.yaml create mode 100644 aio/intel/image-gen.yaml create mode 100644 aio/intel/speech-to-text.yaml create mode 100644 aio/intel/text-to-speech.yaml create mode 100644 aio/intel/text-to-text.yaml create mode 100644 aio/intel/vision.yaml diff --git a/README.md b/README.md index 7ba96ad5..8cf15d5a 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,10 @@

+> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) +> +> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) + [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) **LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml index bdee079c..8576746f 100644 --- a/aio/cpu/embeddings.yaml +++ b/aio/cpu/embeddings.yaml @@ -1,11 +1,5 @@ -backend: bert-embeddings -embeddings: true -f16: true - -gpu_layers: 90 -mmap: true name: text-embedding-ada-002 - +backend: bert-embeddings parameters: model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml index 3b9c2eec..9de88a3f 100644 --- a/aio/cpu/image-gen.yaml +++ b/aio/cpu/image-gen.yaml @@ -50,4 +50,13 @@ download_files: uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" - filename: "stablediffusion_assets/vocab.txt" sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" \ No newline at end of file + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" + +usage: | + curl http://localhost:8080/v1/images/generations \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "|", + "step": 25, + "size": "512x512" + }' \ No newline at end of file diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml index 0777f715..3b466d37 100644 --- a/aio/cpu/vision.yaml +++ b/aio/cpu/vision.yaml @@ -1,8 +1,6 @@ backend: llama-cpp context_size: 4096 f16: true - -gpu_layers: 90 mmap: true name: gpt-4-vision-preview @@ -14,13 +12,6 @@ roles: mmproj: bakllava-mmproj.gguf parameters: model: bakllava.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - seed: -1 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 template: chat: | diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index 795cb86a..d04e5642 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -30,6 +30,7 @@ function detect_gpu() { echo "Intel GPU detected" if [ -d /opt/intel ]; then GPU_ACCELERATION=true + GPU_VENDOR=intel else echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available." fi @@ -75,7 +76,8 @@ function detect_gpu_size() { echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU." GPU_SIZE=gpu-8g fi - + elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then + GPU_SIZE=intel # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs elif [ "$GPU_ACCELERATION" = true ]; then echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented." diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml index 98b519d5..99a74ef7 100644 --- a/aio/gpu-8g/embeddings.yaml +++ b/aio/gpu-8g/embeddings.yaml @@ -1,6 +1,5 @@ name: text-embedding-ada-002 backend: sentencetransformers -embeddings: true parameters: model: all-MiniLM-L6-v2 diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml index 53994ebb..0074aaf0 100644 --- a/aio/gpu-8g/image-gen.yaml +++ b/aio/gpu-8g/image-gen.yaml @@ -20,7 +20,6 @@ usage: | -H "Content-Type: application/json" \ -d '{ "prompt": "|", - "model": "dreamshaper", "step": 25, "size": "512x512" }' \ No newline at end of file diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml index 02542503..db039279 100644 --- a/aio/gpu-8g/vision.yaml +++ b/aio/gpu-8g/vision.yaml @@ -1,8 +1,6 @@ backend: llama-cpp context_size: 4096 f16: true - -gpu_layers: 90 mmap: true name: gpt-4-vision-preview diff --git a/aio/intel/embeddings.yaml b/aio/intel/embeddings.yaml new file mode 100644 index 00000000..99a74ef7 --- /dev/null +++ b/aio/intel/embeddings.yaml @@ -0,0 +1,12 @@ +name: text-embedding-ada-002 +backend: sentencetransformers +parameters: + model: all-MiniLM-L6-v2 + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ + "input": "Your text string goes here", + "model": "text-embedding-ada-002" + }' \ No newline at end of file diff --git a/aio/intel/image-gen.yaml b/aio/intel/image-gen.yaml new file mode 100644 index 00000000..eb724c92 --- /dev/null +++ b/aio/intel/image-gen.yaml @@ -0,0 +1,20 @@ +name: stablediffusion +parameters: + model: runwayml/stable-diffusion-v1-5 +backend: diffusers +step: 25 +f16: true +diffusers: + pipeline_type: StableDiffusionPipeline + cuda: true + enable_parameters: "negative_prompt,num_inference_steps" + scheduler_type: "k_dpmpp_2m" + +usage: | + curl http://localhost:8080/v1/images/generations \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "|", + "step": 25, + "size": "512x512" + }' \ No newline at end of file diff --git a/aio/intel/speech-to-text.yaml b/aio/intel/speech-to-text.yaml new file mode 100644 index 00000000..77850d79 --- /dev/null +++ b/aio/intel/speech-to-text.yaml @@ -0,0 +1,18 @@ +name: whisper-1 +backend: whisper +parameters: + model: ggml-whisper-base.bin + +usage: | + ## example audio file + wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg + + ## Send the example audio file to the transcriptions endpoint + curl http://localhost:8080/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@$PWD/gb1.ogg" -F model="whisper-1" + +download_files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/intel/text-to-speech.yaml b/aio/intel/text-to-speech.yaml new file mode 100644 index 00000000..8d875a29 --- /dev/null +++ b/aio/intel/text-to-speech.yaml @@ -0,0 +1,15 @@ +name: tts-1 +download_files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz + +parameters: + model: en-us-amy-low.onnx + +usage: | + To test if this model works as expected, you can use the following curl command: + + curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ + "model":"tts-1", + "input": "Hi, this is a test." + }' \ No newline at end of file diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml new file mode 100644 index 00000000..ef36b562 --- /dev/null +++ b/aio/intel/text-to-text.yaml @@ -0,0 +1,51 @@ +name: gpt-4 +mmap: false +f16: false +parameters: + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + +roles: + assistant_function_call: assistant + function: tool +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if eq .RoleName "assistant_function_call" }}{{end}} + {{ if eq .RoleName "function" }}{{end}} + <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +stopwords: +- <|im_end|> +- +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "gpt-4", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/aio/intel/vision.yaml b/aio/intel/vision.yaml new file mode 100644 index 00000000..52843162 --- /dev/null +++ b/aio/intel/vision.yaml @@ -0,0 +1,35 @@ +backend: llama-cpp +context_size: 4096 +mmap: false +f16: false +name: gpt-4-vision-preview + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: llava-v1.6-7b-mmproj-f16.gguf +parameters: + model: llava-v1.6-mistral-7b.Q5_K_M.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf +- filename: llava-v1.6-7b-mmproj-f16.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "gpt-4-vision-preview", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 40ec9e4f..3c3a397d 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -49,7 +49,6 @@ icon = "info"

- > 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy) > > [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md index 9c569fb5..331892e9 100644 --- a/docs/content/docs/reference/aio-images.md +++ b/docs/content/docs/reference/aio-images.md @@ -7,6 +7,15 @@ weight = 26 All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. +What you can find configured out of the box: + +- Image generation +- Text generation +- Text to audio +- Audio transcription +- Embeddings +- GPT Vision + | Description | Quay | Docker Hub | | --- | --- |-----------------------------------------------| From b7ffe6621962952e2a69a6caeb6224f00bcf377d Mon Sep 17 00:00:00 2001 From: "Sebastian.W" Date: Wed, 27 Mar 2024 01:48:14 +0800 Subject: [PATCH 0189/2895] Enhance autogptq backend to support VL models (#1860) * Enhance autogptq backend to support VL models * update dependencies for autogptq * remove redundant auto-gptq dependency * Convert base64 to image_url for Qwen-VL model * implemented model inference for qwen-vl * remove user prompt from generated answer * fixed write image error --------- Co-authored-by: Binghua Wu --- backend/python/autogptq/autogptq.py | 56 ++++++++++++++++--- backend/python/autogptq/autogptq.yml | 13 ++++- .../transformers/transformers-nvidia.yml | 9 ++- .../transformers/transformers-rocm.yml | 6 +- .../common-env/transformers/transformers.yml | 9 ++- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index ffb37569..bbafdd92 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -5,12 +5,14 @@ import signal import sys import os import time +import base64 import grpc import backend_pb2 import backend_pb2_grpc + from auto_gptq import AutoGPTQForCausalLM -from transformers import AutoTokenizer +from transformers import AutoTokenizer, AutoModelForCausalLM from transformers import TextGenerationPipeline _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -28,9 +30,19 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Device != "": device = request.Device - tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=request.UseFastTokenizer) + # support loading local model files + model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model) + tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode) - model = AutoGPTQForCausalLM.from_quantized(request.Model, + # support model `Qwen/Qwen-VL-Chat-Int4` + if "qwen-vl" in request.Model.lower(): + self.model_name = "Qwen-VL-Chat" + model = AutoModelForCausalLM.from_pretrained(model_path, + trust_remote_code=request.TrustRemoteCode, + use_triton=request.UseTriton, + device_map="auto").eval() + else: + model = AutoGPTQForCausalLM.from_quantized(model_path, model_basename=request.ModelBaseName, use_safetensors=True, trust_remote_code=request.TrustRemoteCode, @@ -55,6 +67,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.TopP != 0.0: top_p = request.TopP + + prompt_images = self.recompile_vl_prompt(request) + compiled_prompt = prompt_images[0] + print(f"Prompt: {compiled_prompt}", file=sys.stderr) + # Implement Predict RPC pipeline = TextGenerationPipeline( model=self.model, @@ -64,10 +81,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): top_p=top_p, repetition_penalty=penalty, ) - t = pipeline(request.Prompt)[0]["generated_text"] - # Remove prompt from response if present - if request.Prompt in t: - t = t.replace(request.Prompt, "") + t = pipeline(compiled_prompt)[0]["generated_text"] + print(f"generated_text: {t}", file=sys.stderr) + + if compiled_prompt in t: + t = t.replace(compiled_prompt, "") + # house keeping. Remove the image files from /tmp folder + for img_path in prompt_images[1]: + try: + os.remove(img_path) + except Exception as e: + print(f"Error removing image file: {img_path}, {e}", file=sys.stderr) return backend_pb2.Result(message=bytes(t, encoding='utf-8')) @@ -78,6 +102,24 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): # Not implemented yet return self.Predict(request, context) + def recompile_vl_prompt(self, request): + prompt = request.Prompt + image_paths = [] + + if "qwen-vl" in self.model_name.lower(): + # request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename. + # Then, save the image file paths to an array "image_paths". + # read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt". + for i, img in enumerate(request.Images): + timestamp = str(int(time.time() * 1000)) # Generate timestamp + img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename + with open(img_path, "wb") as f: + f.write(base64.b64decode(img)) + image_paths.append(img_path) + prompt = prompt.replace(f"[img-{i}]", "" + img_path + ",") + else: + prompt = request.Prompt + return (prompt, image_paths) def serve(address): server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) diff --git a/backend/python/autogptq/autogptq.yml b/backend/python/autogptq/autogptq.yml index 19b8e41d..d22b354e 100644 --- a/backend/python/autogptq/autogptq.yml +++ b/backend/python/autogptq/autogptq.yml @@ -1,3 +1,7 @@ +#### +# Attention! This file is abandoned. +# Please use the ../common-env/transformers/transformers.yml file to manage dependencies. +### name: autogptq channels: - defaults @@ -24,12 +28,12 @@ dependencies: - xz=5.4.2=h5eee18b_0 - zlib=1.2.13=h5eee18b_0 - pip: - - accelerate==0.23.0 + - accelerate==0.27.0 - aiohttp==3.8.5 - aiosignal==1.3.1 - async-timeout==4.0.3 - attrs==23.1.0 - - auto-gptq==0.4.2 + - auto-gptq==0.7.1 - certifi==2023.7.22 - charset-normalizer==3.3.0 - datasets==2.14.5 @@ -59,6 +63,7 @@ dependencies: - nvidia-nccl-cu12==2.18.1 - nvidia-nvjitlink-cu12==12.2.140 - nvidia-nvtx-cu12==12.1.105 + - optimum==1.17.1 - packaging==23.2 - pandas==2.1.1 - peft==0.5.0 @@ -75,9 +80,11 @@ dependencies: - six==1.16.0 - sympy==1.12 - tokenizers==0.14.0 - - torch==2.1.0 - tqdm==4.66.1 + - torch==2.2.1 + - torchvision==0.17.1 - transformers==4.34.0 + - transformers_stream_generator==0.0.5 - triton==2.1.0 - typing-extensions==4.8.0 - tzdata==2023.3 diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index 7daafe51..55361234 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -24,10 +24,11 @@ dependencies: - xz=5.4.2=h5eee18b_0 - zlib=1.2.13=h5eee18b_0 - pip: - - accelerate==0.23.0 + - accelerate==0.27.0 - aiohttp==3.8.5 - aiosignal==1.3.1 - async-timeout==4.0.3 + - auto-gptq==0.7.1 - attrs==23.1.0 - bark==0.1.5 - bitsandbytes==0.43.0 @@ -69,6 +70,7 @@ dependencies: - nvidia-nccl-cu12==2.18.1 - nvidia-nvjitlink-cu12==12.2.140 - nvidia-nvtx-cu12==12.1.105 + - optimum==1.17.1 - packaging==23.2 - pandas - peft==0.5.0 @@ -87,7 +89,8 @@ dependencies: - six==1.16.0 - sympy==1.12 - tokenizers - - torch==2.1.2 + - torch==2.2.1 + - torchvision==0.17.1 - torchaudio==2.1.2 - tqdm==4.66.1 - triton==2.1.0 @@ -95,7 +98,6 @@ dependencies: - tzdata==2023.3 - urllib3==1.26.17 - xxhash==3.4.1 - - auto-gptq==0.6.0 - yarl==1.9.2 - soundfile - langid @@ -116,5 +118,6 @@ dependencies: - vocos - vllm==0.3.2 - transformers>=4.38.2 # Updated Version + - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index 5c18d301..fa245bf4 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -26,7 +26,8 @@ dependencies: - pip: - --pre - --extra-index-url https://download.pytorch.org/whl/nightly/ - - accelerate==0.23.0 + - accelerate==0.27.0 + - auto-gptq==0.7.1 - aiohttp==3.8.5 - aiosignal==1.3.1 - async-timeout==4.0.3 @@ -82,7 +83,6 @@ dependencies: - triton==2.1.0 - typing-extensions==4.8.0 - tzdata==2023.3 - - auto-gptq==0.6.0 - urllib3==1.26.17 - xxhash==3.4.1 - yarl==1.9.2 @@ -90,6 +90,7 @@ dependencies: - langid - wget - unidecode + - optimum==1.17.1 - pyopenjtalk-prebuilt - pypinyin - inflect @@ -105,5 +106,6 @@ dependencies: - vocos - vllm==0.3.2 - transformers>=4.38.2 # Updated Version + - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 5726abaf..bdf8c36f 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -24,9 +24,10 @@ dependencies: - xz=5.4.2=h5eee18b_0 - zlib=1.2.13=h5eee18b_0 - pip: - - accelerate==0.23.0 + - accelerate==0.27.0 - aiohttp==3.8.5 - aiosignal==1.3.1 + - auto-gptq==0.7.1 - async-timeout==4.0.3 - attrs==23.1.0 - bark==0.1.5 @@ -56,6 +57,7 @@ dependencies: - multiprocess==0.70.15 - networkx - numpy==1.26.0 + - optimum==1.17.1 - packaging==23.2 - pandas - peft==0.5.0 @@ -74,13 +76,13 @@ dependencies: - six==1.16.0 - sympy==1.12 - tokenizers - - torch==2.1.2 + - torch==2.2.1 + - torchvision==0.17.1 - torchaudio==2.1.2 - tqdm==4.66.1 - triton==2.1.0 - typing-extensions==4.8.0 - tzdata==2023.3 - - auto-gptq==0.6.0 - urllib3==1.26.17 - xxhash==3.4.1 - yarl==1.9.2 @@ -103,5 +105,6 @@ dependencies: - vocos - vllm==0.3.2 - transformers>=4.38.2 # Updated Version + - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 prefix: /opt/conda/envs/transformers From 2d7913b3bef9d6135510572e660e72dfbda29050 Mon Sep 17 00:00:00 2001 From: Steven Christou <1302212+christ66@users.noreply.github.com> Date: Tue, 26 Mar 2024 10:54:35 -0700 Subject: [PATCH 0190/2895] feat(assistant): Assistant and AssistantFiles api (#1803) * Initial implementation of assistants api * Move load/save configs to utils * Save assistant and assistantfiles config to disk. * Add tsets for assistant api * Fix models path spelling mistake. * Remove personal go.mod information --------- Co-authored-by: Ettore Di Giacinto --- core/config/application_config.go | 7 + core/http/api.go | 34 +- core/http/endpoints/openai/assistant.go | 515 +++++++++++++++++++ core/http/endpoints/openai/assistant_test.go | 456 ++++++++++++++++ core/http/endpoints/openai/files.go | 64 +-- core/http/endpoints/openai/files_test.go | 45 +- main.go | 7 + pkg/utils/config.go | 41 ++ 8 files changed, 1108 insertions(+), 61 deletions(-) create mode 100644 core/http/endpoints/openai/assistant.go create mode 100644 core/http/endpoints/openai/assistant_test.go create mode 100644 pkg/utils/config.go diff --git a/core/config/application_config.go b/core/config/application_config.go index 03242c3c..c2d4e13a 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -20,6 +20,7 @@ type ApplicationConfig struct { ImageDir string AudioDir string UploadDir string + ConfigsDir string CORS bool PreloadJSONModels string PreloadModelsFromPath string @@ -252,6 +253,12 @@ func WithUploadDir(uploadDir string) AppOption { } } +func WithConfigsDir(configsDir string) AppOption { + return func(o *ApplicationConfig) { + o.ConfigsDir = configsDir + } +} + func WithApiKeys(apiKeys []string) AppOption { return func(o *ApplicationConfig) { o.ApiKeys = apiKeys diff --git a/core/http/api.go b/core/http/api.go index 039e835b..de0a4939 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -3,6 +3,7 @@ package http import ( "encoding/json" "errors" + "github.com/go-skynet/LocalAI/pkg/utils" "os" "strings" @@ -155,8 +156,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi }{Version: internal.PrintableVersion()}) }) - // Load upload json - openai.LoadUploadConfig(appConfig.UploadDir) + // Make sure directories exists + os.MkdirAll(appConfig.ImageDir, 0755) + os.MkdirAll(appConfig.AudioDir, 0755) + os.MkdirAll(appConfig.UploadDir, 0755) + os.MkdirAll(appConfig.ConfigsDir, 0755) + os.MkdirAll(appConfig.ModelPath, 0755) + + // Load config jsons + utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) + utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) + utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) @@ -189,6 +199,26 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + // assistant + app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + // files app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go new file mode 100644 index 00000000..0e0d8a99 --- /dev/null +++ b/core/http/endpoints/openai/assistant.go @@ -0,0 +1,515 @@ +package openai + +import ( + "fmt" + "github.com/go-skynet/LocalAI/core/config" + model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" + "net/http" + "sort" + "strconv" + "strings" + "sync/atomic" + "time" +) + +// ToolType defines a type for tool options +type ToolType string + +const ( + CodeInterpreter ToolType = "code_interpreter" + Retrieval ToolType = "retrieval" + Function ToolType = "function" + + MaxCharacterInstructions = 32768 + MaxCharacterDescription = 512 + MaxCharacterName = 256 + MaxToolsSize = 128 + MaxFileIdSize = 20 + MaxCharacterMetadataKey = 64 + MaxCharacterMetadataValue = 512 +) + +type Tool struct { + Type ToolType `json:"type"` +} + +// Assistant represents the structure of an assistant object from the OpenAI API. +type Assistant struct { + ID string `json:"id"` // The unique identifier of the assistant. + Object string `json:"object"` // Object type, which is "assistant". + Created int64 `json:"created"` // The time at which the assistant was created. + Model string `json:"model"` // The model ID used by the assistant. + Name string `json:"name,omitempty"` // The name of the assistant. + Description string `json:"description,omitempty"` // The description of the assistant. + Instructions string `json:"instructions,omitempty"` // The system instructions that the assistant uses. + Tools []Tool `json:"tools,omitempty"` // A list of tools enabled on the assistant. + FileIDs []string `json:"file_ids,omitempty"` // A list of file IDs attached to this assistant. + Metadata map[string]string `json:"metadata,omitempty"` // Set of key-value pairs attached to the assistant. +} + +var ( + Assistants = []Assistant{} // better to return empty array instead of "null" + AssistantsConfigFile = "assistants.json" +) + +type AssistantRequest struct { + Model string `json:"model"` + Name string `json:"name,omitempty"` + Description string `json:"description,omitempty"` + Instructions string `json:"instructions,omitempty"` + Tools []Tool `json:"tools,omitempty"` + FileIDs []string `json:"file_ids,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + request := new(AssistantRequest) + if err := c.BodyParser(request); err != nil { + log.Warn().AnErr("Unable to parse AssistantRequest", err) + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) + } + + if !modelExists(ml, request.Model) { + log.Warn().Msgf("Model: %s was not found in list of models.", request.Model) + return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found") + } + + if request.Tools == nil { + request.Tools = []Tool{} + } + + if request.FileIDs == nil { + request.FileIDs = []string{} + } + + if request.Metadata == nil { + request.Metadata = make(map[string]string) + } + + id := "asst_" + strconv.FormatInt(generateRandomID(), 10) + + assistant := Assistant{ + ID: id, + Object: "assistant", + Created: time.Now().Unix(), + Model: request.Model, + Name: request.Name, + Description: request.Description, + Instructions: request.Instructions, + Tools: request.Tools, + FileIDs: request.FileIDs, + Metadata: request.Metadata, + } + + Assistants = append(Assistants, assistant) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants) + return c.Status(fiber.StatusOK).JSON(assistant) + } +} + +var currentId int64 = 0 + +func generateRandomID() int64 { + atomic.AddInt64(¤tId, 1) + return currentId +} + +func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + // Because we're altering the existing assistants list we should just duplicate it for now. + returnAssistants := Assistants + // Parse query parameters + limitQuery := c.Query("limit", "20") + orderQuery := c.Query("order", "desc") + afterQuery := c.Query("after") + beforeQuery := c.Query("before") + + // Convert string limit to integer + limit, err := strconv.Atoi(limitQuery) + if err != nil { + return c.Status(http.StatusBadRequest).SendString(fmt.Sprintf("Invalid limit query value: %s", limitQuery)) + } + + // Sort assistants + sort.SliceStable(returnAssistants, func(i, j int) bool { + if orderQuery == "asc" { + return returnAssistants[i].Created < returnAssistants[j].Created + } + return returnAssistants[i].Created > returnAssistants[j].Created + }) + + // After and before cursors + if afterQuery != "" { + returnAssistants = filterAssistantsAfterID(returnAssistants, afterQuery) + } + if beforeQuery != "" { + returnAssistants = filterAssistantsBeforeID(returnAssistants, beforeQuery) + } + + // Apply limit + if limit < len(returnAssistants) { + returnAssistants = returnAssistants[:limit] + } + + return c.JSON(returnAssistants) + } +} + +// FilterAssistantsBeforeID filters out those assistants whose ID comes before the given ID +// We assume that the assistants are already sorted +func filterAssistantsBeforeID(assistants []Assistant, id string) []Assistant { + idInt, err := strconv.Atoi(id) + if err != nil { + return assistants // Return original slice if invalid id format is provided + } + + var filteredAssistants []Assistant + + for _, assistant := range assistants { + aid, err := strconv.Atoi(strings.TrimPrefix(assistant.ID, "asst_")) + if err != nil { + continue // Skip if invalid id in assistant + } + + if aid < idInt { + filteredAssistants = append(filteredAssistants, assistant) + } + } + + return filteredAssistants +} + +// FilterAssistantsAfterID filters out those assistants whose ID comes after the given ID +// We assume that the assistants are already sorted +func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant { + idInt, err := strconv.Atoi(id) + if err != nil { + return assistants // Return original slice if invalid id format is provided + } + + var filteredAssistants []Assistant + + for _, assistant := range assistants { + aid, err := strconv.Atoi(strings.TrimPrefix(assistant.ID, "asst_")) + if err != nil { + continue // Skip if invalid id in assistant + } + + if aid > idInt { + filteredAssistants = append(filteredAssistants, assistant) + } + } + + return filteredAssistants +} + +func modelExists(ml *model.ModelLoader, modelName string) (found bool) { + found = false + models, err := ml.ListModels() + if err != nil { + return + } + + for _, model := range models { + if model == modelName { + found = true + return + } + } + return +} + +func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + type DeleteAssistantResponse struct { + ID string `json:"id"` + Object string `json:"object"` + Deleted bool `json:"deleted"` + } + + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for i, assistant := range Assistants { + if assistant.ID == assistantID { + Assistants = append(Assistants[:i], Assistants[i+1:]...) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants) + return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{ + ID: assistantID, + Object: "assistant.deleted", + Deleted: true, + }) + } + } + + log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID) + return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{ + ID: assistantID, + Object: "assistant.deleted", + Deleted: false, + }) + } +} + +func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for _, assistant := range Assistants { + if assistant.ID == assistantID { + return c.Status(fiber.StatusOK).JSON(assistant) + } + } + + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) + } +} + +type AssistantFile struct { + ID string `json:"id"` + Object string `json:"object"` + CreatedAt int64 `json:"created_at"` + AssistantID string `json:"assistant_id"` +} + +var ( + AssistantFiles []AssistantFile + AssistantsFileConfigFile = "assistantsFile.json" +) + +type AssistantFileRequest struct { + FileID string `json:"file_id"` +} + +type DeleteAssistantFileResponse struct { + ID string `json:"id"` + Object string `json:"object"` + Deleted bool `json:"deleted"` +} + +func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + request := new(AssistantFileRequest) + if err := c.BodyParser(request); err != nil { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) + } + + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for _, assistant := range Assistants { + if assistant.ID == assistantID { + if len(assistant.FileIDs) > MaxFileIdSize { + return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("Max files %d for assistant %s reached.", MaxFileIdSize, assistant.Name)) + } + + for _, file := range UploadedFiles { + if file.ID == request.FileID { + assistant.FileIDs = append(assistant.FileIDs, request.FileID) + assistantFile := AssistantFile{ + ID: file.ID, + Object: "assistant.file", + CreatedAt: time.Now().Unix(), + AssistantID: assistant.ID, + } + AssistantFiles = append(AssistantFiles, assistantFile) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) + return c.Status(fiber.StatusOK).JSON(assistantFile) + } + } + + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find file_id: %s", request.FileID)) + } + } + + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find ")) + } +} + +func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + type ListAssistantFiles struct { + Data []File + Object string + } + + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + limitQuery := c.Query("limit", "20") + order := c.Query("order", "desc") + limit, err := strconv.Atoi(limitQuery) + if err != nil || limit < 1 || limit > 100 { + limit = 20 // Default to 20 if there's an error or the limit is out of bounds + } + + // Sort files by CreatedAt depending on the order query parameter + if order == "asc" { + sort.Slice(AssistantFiles, func(i, j int) bool { + return AssistantFiles[i].CreatedAt < AssistantFiles[j].CreatedAt + }) + } else { // default to "desc" + sort.Slice(AssistantFiles, func(i, j int) bool { + return AssistantFiles[i].CreatedAt > AssistantFiles[j].CreatedAt + }) + } + + // Limit the number of files returned + var limitedFiles []AssistantFile + hasMore := false + if len(AssistantFiles) > limit { + hasMore = true + limitedFiles = AssistantFiles[:limit] + } else { + limitedFiles = AssistantFiles + } + + response := map[string]interface{}{ + "object": "list", + "data": limitedFiles, + "first_id": func() string { + if len(limitedFiles) > 0 { + return limitedFiles[0].ID + } + return "" + }(), + "last_id": func() string { + if len(limitedFiles) > 0 { + return limitedFiles[len(limitedFiles)-1].ID + } + return "" + }(), + "has_more": hasMore, + } + + return c.Status(fiber.StatusOK).JSON(response) + } +} + +func ModifyAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + request := new(AssistantRequest) + if err := c.BodyParser(request); err != nil { + log.Warn().AnErr("Unable to parse AssistantRequest", err) + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) + } + + assistantID := c.Params("assistant_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required") + } + + for i, assistant := range Assistants { + if assistant.ID == assistantID { + newAssistant := Assistant{ + ID: assistantID, + Object: assistant.Object, + Created: assistant.Created, + Model: request.Model, + Name: request.Name, + Description: request.Description, + Instructions: request.Instructions, + Tools: request.Tools, + FileIDs: request.FileIDs, // todo: should probably verify fileids exist + Metadata: request.Metadata, + } + + // Remove old one and replace with new one + Assistants = append(Assistants[:i], Assistants[i+1:]...) + Assistants = append(Assistants, newAssistant) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants) + return c.Status(fiber.StatusOK).JSON(newAssistant) + } + } + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) + } +} + +func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + fileId := c.Params("file_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required") + } + // First remove file from assistant + for i, assistant := range Assistants { + if assistant.ID == assistantID { + for j, fileId := range assistant.FileIDs { + if fileId == fileId { + Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...) + + // Check if the file exists in the assistantFiles slice + for i, assistantFile := range AssistantFiles { + if assistantFile.ID == fileId { + // Remove the file from the assistantFiles slice + AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) + return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: true, + }) + } + } + } + } + + log.Warn().Msgf("Unable to locate file_id: %s in assistants: %s. Continuing to delete assistant file.", fileId, assistantID) + for i, assistantFile := range AssistantFiles { + if assistantFile.AssistantID == assistantID { + + AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) + + return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: true, + }) + } + } + } + } + log.Warn().Msgf("Unable to find assistant: %s", assistantID) + + return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: false, + }) + } +} + +func GetAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + assistantID := c.Params("assistant_id") + fileId := c.Params("file_id") + if assistantID == "" { + return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required") + } + + for _, assistantFile := range AssistantFiles { + if assistantFile.AssistantID == assistantID { + if assistantFile.ID == fileId { + return c.Status(fiber.StatusOK).JSON(assistantFile) + } + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId)) + } + } + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID)) + } +} diff --git a/core/http/endpoints/openai/assistant_test.go b/core/http/endpoints/openai/assistant_test.go new file mode 100644 index 00000000..bdc41dda --- /dev/null +++ b/core/http/endpoints/openai/assistant_test.go @@ -0,0 +1,456 @@ +package openai + +import ( + "encoding/json" + "fmt" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/assert" + "io" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +var configsDir string = "/tmp/localai/configs" + +type MockLoader struct { + models []string +} + +func tearDown() func() { + return func() { + UploadedFiles = []File{} + Assistants = []Assistant{} + AssistantFiles = []AssistantFile{} + _ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile)) + _ = os.Remove(filepath.Join(configsDir, AssistantsFileConfigFile)) + } +} + +func TestAssistantEndpoints(t *testing.T) { + // Preparing the mocked objects + cl := &config.BackendConfigLoader{} + //configsDir := "/tmp/localai/configs" + modelPath := "/tmp/localai/model" + var ml = model.NewModelLoader(modelPath) + + appConfig := &config.ApplicationConfig{ + ConfigsDir: configsDir, + UploadLimitMB: 10, + UploadDir: "test_dir", + ModelPath: modelPath, + } + + _ = os.RemoveAll(appConfig.ConfigsDir) + _ = os.MkdirAll(appConfig.ConfigsDir, 0755) + _ = os.MkdirAll(modelPath, 0755) + os.Create(filepath.Join(modelPath, "ggml-gpt4all-j")) + + app := fiber.New(fiber.Config{ + BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB. + }) + + // Create a Test Server + app.Get("/assistants", ListAssistantsEndpoint(cl, ml, appConfig)) + app.Post("/assistants", CreateAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id", DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id", GetAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id", ModifyAssistantEndpoint(cl, ml, appConfig)) + + app.Post("/files", UploadFilesEndpoint(cl, appConfig)) + app.Get("/assistants/:assistant_id/files", ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id/files", CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", GetAssistantFileEndpoint(cl, ml, appConfig)) + + t.Run("CreateAssistantEndpoint", func(t *testing.T) { + t.Cleanup(tearDown()) + ar := &AssistantRequest{ + Model: "ggml-gpt4all-j", + Name: "3.5-turbo", + Description: "Test Assistant", + Instructions: "You are computer science teacher answering student questions", + Tools: []Tool{{Type: Function}}, + FileIDs: nil, + Metadata: nil, + } + + resultAssistant, resp, err := createAssistant(app, *ar) + assert.NoError(t, err) + assert.Equal(t, fiber.StatusOK, resp.StatusCode) + + assert.Equal(t, 1, len(Assistants)) + //t.Cleanup(cleanupAllAssistants(t, app, []string{resultAssistant.ID})) + + assert.Equal(t, ar.Name, resultAssistant.Name) + assert.Equal(t, ar.Model, resultAssistant.Model) + assert.Equal(t, ar.Tools, resultAssistant.Tools) + assert.Equal(t, ar.Description, resultAssistant.Description) + assert.Equal(t, ar.Instructions, resultAssistant.Instructions) + assert.Equal(t, ar.FileIDs, resultAssistant.FileIDs) + assert.Equal(t, ar.Metadata, resultAssistant.Metadata) + }) + + t.Run("ListAssistantsEndpoint", func(t *testing.T) { + var ids []string + var resultAssistant []Assistant + for i := 0; i < 4; i++ { + ar := &AssistantRequest{ + Model: "ggml-gpt4all-j", + Name: fmt.Sprintf("3.5-turbo-%d", i), + Description: fmt.Sprintf("Test Assistant - %d", i), + Instructions: fmt.Sprintf("You are computer science teacher answering student questions - %d", i), + Tools: []Tool{{Type: Function}}, + FileIDs: []string{"fid-1234"}, + Metadata: map[string]string{"meta": "data"}, + } + + //var err error + ra, _, err := createAssistant(app, *ar) + // Because we create the assistants so fast all end up with the same created time. + time.Sleep(time.Second) + resultAssistant = append(resultAssistant, ra) + assert.NoError(t, err) + ids = append(ids, resultAssistant[i].ID) + } + + t.Cleanup(cleanupAllAssistants(t, app, ids)) + + tests := []struct { + name string + reqURL string + expectedStatus int + expectedResult []Assistant + expectedStringResult string + }{ + { + name: "Valid Usage - limit only", + reqURL: "/assistants?limit=2", + expectedStatus: http.StatusOK, + expectedResult: Assistants[:2], // Expecting the first two assistants + }, + { + name: "Valid Usage - order asc", + reqURL: "/assistants?order=asc", + expectedStatus: http.StatusOK, + expectedResult: Assistants, // Expecting all assistants in ascending order + }, + { + name: "Valid Usage - order desc", + reqURL: "/assistants?order=desc", + expectedStatus: http.StatusOK, + expectedResult: []Assistant{Assistants[3], Assistants[2], Assistants[1], Assistants[0]}, // Expecting all assistants in descending order + }, + { + name: "Valid Usage - after specific ID", + reqURL: "/assistants?after=2", + expectedStatus: http.StatusOK, + // Note this is correct because it's put in descending order already + expectedResult: Assistants[:3], // Expecting assistants after (excluding) ID 2 + }, + { + name: "Valid Usage - before specific ID", + reqURL: "/assistants?before=4", + expectedStatus: http.StatusOK, + expectedResult: Assistants[2:], // Expecting assistants before (excluding) ID 3. + }, + { + name: "Invalid Usage - non-integer limit", + reqURL: "/assistants?limit=two", + expectedStatus: http.StatusBadRequest, + expectedStringResult: "Invalid limit query value: two", + }, + { + name: "Invalid Usage - non-existing id in after", + reqURL: "/assistants?after=100", + expectedStatus: http.StatusOK, + expectedResult: []Assistant(nil), // Expecting empty list as there are no IDs above 100 + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + request := httptest.NewRequest(http.MethodGet, tt.reqURL, nil) + response, err := app.Test(request) + assert.NoError(t, err) + assert.Equal(t, tt.expectedStatus, response.StatusCode) + if tt.expectedStatus != fiber.StatusOK { + all, _ := ioutil.ReadAll(response.Body) + assert.Equal(t, tt.expectedStringResult, string(all)) + } else { + var result []Assistant + err = json.NewDecoder(response.Body).Decode(&result) + assert.NoError(t, err) + + assert.Equal(t, tt.expectedResult, result) + } + }) + } + }) + + t.Run("DeleteAssistantEndpoint", func(t *testing.T) { + ar := &AssistantRequest{ + Model: "ggml-gpt4all-j", + Name: "3.5-turbo", + Description: "Test Assistant", + Instructions: "You are computer science teacher answering student questions", + Tools: []Tool{{Type: Function}}, + FileIDs: nil, + Metadata: nil, + } + + resultAssistant, _, err := createAssistant(app, *ar) + assert.NoError(t, err) + + target := fmt.Sprintf("/assistants/%s", resultAssistant.ID) + deleteReq := httptest.NewRequest(http.MethodDelete, target, nil) + _, err = app.Test(deleteReq) + assert.NoError(t, err) + assert.Equal(t, 0, len(Assistants)) + }) + + t.Run("GetAssistantEndpoint", func(t *testing.T) { + ar := &AssistantRequest{ + Model: "ggml-gpt4all-j", + Name: "3.5-turbo", + Description: "Test Assistant", + Instructions: "You are computer science teacher answering student questions", + Tools: []Tool{{Type: Function}}, + FileIDs: nil, + Metadata: nil, + } + + resultAssistant, _, err := createAssistant(app, *ar) + assert.NoError(t, err) + t.Cleanup(cleanupAllAssistants(t, app, []string{resultAssistant.ID})) + + target := fmt.Sprintf("/assistants/%s", resultAssistant.ID) + request := httptest.NewRequest(http.MethodGet, target, nil) + response, err := app.Test(request) + assert.NoError(t, err) + + var getAssistant Assistant + err = json.NewDecoder(response.Body).Decode(&getAssistant) + assert.NoError(t, err) + + assert.Equal(t, resultAssistant.ID, getAssistant.ID) + }) + + t.Run("ModifyAssistantEndpoint", func(t *testing.T) { + ar := &AssistantRequest{ + Model: "ggml-gpt4all-j", + Name: "3.5-turbo", + Description: "Test Assistant", + Instructions: "You are computer science teacher answering student questions", + Tools: []Tool{{Type: Function}}, + FileIDs: nil, + Metadata: nil, + } + + resultAssistant, _, err := createAssistant(app, *ar) + assert.NoError(t, err) + + modifiedAr := &AssistantRequest{ + Model: "ggml-gpt4all-j", + Name: "4.0-turbo", + Description: "Modified Test Assistant", + Instructions: "You are math teacher answering student questions", + Tools: []Tool{{Type: CodeInterpreter}}, + FileIDs: nil, + Metadata: nil, + } + + modifiedArJson, err := json.Marshal(modifiedAr) + assert.NoError(t, err) + + target := fmt.Sprintf("/assistants/%s", resultAssistant.ID) + request := httptest.NewRequest(http.MethodPost, target, strings.NewReader(string(modifiedArJson))) + request.Header.Set(fiber.HeaderContentType, "application/json") + + modifyResponse, err := app.Test(request) + assert.NoError(t, err) + var getAssistant Assistant + err = json.NewDecoder(modifyResponse.Body).Decode(&getAssistant) + + t.Cleanup(cleanupAllAssistants(t, app, []string{getAssistant.ID})) + + assert.Equal(t, resultAssistant.ID, getAssistant.ID) // IDs should match even if contents change + assert.Equal(t, modifiedAr.Tools, getAssistant.Tools) + assert.Equal(t, modifiedAr.Name, getAssistant.Name) + assert.Equal(t, modifiedAr.Instructions, getAssistant.Instructions) + assert.Equal(t, modifiedAr.Description, getAssistant.Description) + }) + + t.Run("CreateAssistantFileEndpoint", func(t *testing.T) { + t.Cleanup(tearDown()) + file, assistant, err := createFileAndAssistant(t, app, appConfig) + assert.NoError(t, err) + + afr := AssistantFileRequest{FileID: file.ID} + af, _, err := createAssistantFile(app, afr, assistant.ID) + + assert.NoError(t, err) + assert.Equal(t, assistant.ID, af.AssistantID) + }) + t.Run("ListAssistantFilesEndpoint", func(t *testing.T) { + t.Cleanup(tearDown()) + file, assistant, err := createFileAndAssistant(t, app, appConfig) + assert.NoError(t, err) + + afr := AssistantFileRequest{FileID: file.ID} + af, _, err := createAssistantFile(app, afr, assistant.ID) + assert.NoError(t, err) + + assert.Equal(t, assistant.ID, af.AssistantID) + }) + t.Run("GetAssistantFileEndpoint", func(t *testing.T) { + t.Cleanup(tearDown()) + file, assistant, err := createFileAndAssistant(t, app, appConfig) + assert.NoError(t, err) + + afr := AssistantFileRequest{FileID: file.ID} + af, _, err := createAssistantFile(app, afr, assistant.ID) + assert.NoError(t, err) + t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID)) + + target := fmt.Sprintf("/assistants/%s/files/%s", assistant.ID, file.ID) + request := httptest.NewRequest(http.MethodGet, target, nil) + response, err := app.Test(request) + assert.NoError(t, err) + + var assistantFile AssistantFile + err = json.NewDecoder(response.Body).Decode(&assistantFile) + assert.NoError(t, err) + + assert.Equal(t, af.ID, assistantFile.ID) + assert.Equal(t, af.AssistantID, assistantFile.AssistantID) + }) + t.Run("DeleteAssistantFileEndpoint", func(t *testing.T) { + t.Cleanup(tearDown()) + file, assistant, err := createFileAndAssistant(t, app, appConfig) + assert.NoError(t, err) + + afr := AssistantFileRequest{FileID: file.ID} + af, _, err := createAssistantFile(app, afr, assistant.ID) + assert.NoError(t, err) + + cleanupAssistantFile(t, app, af.ID, af.AssistantID)() + + assert.Empty(t, AssistantFiles) + }) + +} + +func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (File, Assistant, error) { + ar := &AssistantRequest{ + Model: "ggml-gpt4all-j", + Name: "3.5-turbo", + Description: "Test Assistant", + Instructions: "You are computer science teacher answering student questions", + Tools: []Tool{{Type: Function}}, + FileIDs: nil, + Metadata: nil, + } + + assistant, _, err := createAssistant(app, *ar) + if err != nil { + return File{}, Assistant{}, err + } + t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID})) + + file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, o) + t.Cleanup(func() { + _, err := CallFilesDeleteEndpoint(t, app, file.ID) + assert.NoError(t, err) + }) + return file, assistant, nil +} + +func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) { + afrJson, err := json.Marshal(afr) + if err != nil { + return AssistantFile{}, nil, err + } + + target := fmt.Sprintf("/assistants/%s/files", assistantId) + request := httptest.NewRequest(http.MethodPost, target, strings.NewReader(string(afrJson))) + request.Header.Set(fiber.HeaderContentType, "application/json") + request.Header.Set("OpenAi-Beta", "assistants=v1") + + resp, err := app.Test(request) + if err != nil { + return AssistantFile{}, resp, err + } + + var assistantFile AssistantFile + all, err := ioutil.ReadAll(resp.Body) + err = json.NewDecoder(strings.NewReader(string(all))).Decode(&assistantFile) + if err != nil { + return AssistantFile{}, resp, err + } + + return assistantFile, resp, nil +} + +func createAssistant(app *fiber.App, ar AssistantRequest) (Assistant, *http.Response, error) { + assistant, err := json.Marshal(ar) + if err != nil { + return Assistant{}, nil, err + } + + request := httptest.NewRequest(http.MethodPost, "/assistants", strings.NewReader(string(assistant))) + request.Header.Set(fiber.HeaderContentType, "application/json") + request.Header.Set("OpenAi-Beta", "assistants=v1") + + resp, err := app.Test(request) + if err != nil { + return Assistant{}, resp, err + } + + bodyString, err := io.ReadAll(resp.Body) + if err != nil { + return Assistant{}, resp, err + } + + var resultAssistant Assistant + err = json.NewDecoder(strings.NewReader(string(bodyString))).Decode(&resultAssistant) + + return resultAssistant, resp, nil +} + +func cleanupAllAssistants(t *testing.T, app *fiber.App, ids []string) func() { + return func() { + for _, assistant := range ids { + target := fmt.Sprintf("/assistants/%s", assistant) + deleteReq := httptest.NewRequest(http.MethodDelete, target, nil) + _, err := app.Test(deleteReq) + if err != nil { + t.Fatalf("Failed to delete assistant %s: %v", assistant, err) + } + } + } +} + +func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId string) func() { + return func() { + target := fmt.Sprintf("/assistants/%s/files/%s", assistantId, fileId) + request := httptest.NewRequest(http.MethodDelete, target, nil) + request.Header.Set(fiber.HeaderContentType, "application/json") + request.Header.Set("OpenAi-Beta", "assistants=v1") + + resp, err := app.Test(request) + assert.NoError(t, err) + + var dafr DeleteAssistantFileResponse + err = json.NewDecoder(resp.Body).Decode(&dafr) + assert.NoError(t, err) + assert.True(t, dafr.Deleted) + } +} diff --git a/core/http/endpoints/openai/files.go b/core/http/endpoints/openai/files.go index 5cb8d7a9..add9aaa0 100644 --- a/core/http/endpoints/openai/files.go +++ b/core/http/endpoints/openai/files.go @@ -1,23 +1,22 @@ package openai import ( - "encoding/json" "errors" "fmt" "os" "path/filepath" + "sync/atomic" "time" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" - "github.com/rs/zerolog/log" ) -var uploadedFiles []File +var UploadedFiles []File -const uploadedFilesFile = "uploadedFiles.json" +const UploadedFilesFile = "uploadedFiles.json" // File represents the structure of a file object from the OpenAI API. type File struct { @@ -29,38 +28,6 @@ type File struct { Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.) } -func saveUploadConfig(uploadDir string) { - file, err := json.MarshalIndent(uploadedFiles, "", " ") - if err != nil { - log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err) - } - - err = os.WriteFile(filepath.Join(uploadDir, uploadedFilesFile), file, 0644) - if err != nil { - log.Error().Msgf("Failed to save uploadedFiles to file: %s", err) - } -} - -func LoadUploadConfig(uploadPath string) { - uploadFilePath := filepath.Join(uploadPath, uploadedFilesFile) - - _, err := os.Stat(uploadFilePath) - if os.IsNotExist(err) { - log.Debug().Msgf("No uploadedFiles file found at %s", uploadFilePath) - return - } - - file, err := os.ReadFile(uploadFilePath) - if err != nil { - log.Error().Msgf("Failed to read file: %s", err) - } else { - err = json.Unmarshal(file, &uploadedFiles) - if err != nil { - log.Error().Msgf("Failed to JSON unmarshal the file into uploadedFiles: %s", err) - } - } -} - // UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { @@ -95,7 +62,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli } f := File{ - ID: fmt.Sprintf("file-%d", time.Now().Unix()), + ID: fmt.Sprintf("file-%d", getNextFileId()), Object: "file", Bytes: int(file.Size), CreatedAt: time.Now(), @@ -103,12 +70,19 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli Purpose: purpose, } - uploadedFiles = append(uploadedFiles, f) - saveUploadConfig(appConfig.UploadDir) + UploadedFiles = append(UploadedFiles, f) + utils.SaveConfig(appConfig.UploadDir, UploadedFilesFile, UploadedFiles) return c.Status(fiber.StatusOK).JSON(f) } } +var currentFileId int64 = 0 + +func getNextFileId() int64 { + atomic.AddInt64(¤tId, 1) + return currentId +} + // ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { type ListFiles struct { @@ -121,9 +95,9 @@ func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applica purpose := c.Query("purpose") if purpose == "" { - listFiles.Data = uploadedFiles + listFiles.Data = UploadedFiles } else { - for _, f := range uploadedFiles { + for _, f := range UploadedFiles { if purpose == f.Purpose { listFiles.Data = append(listFiles.Data, f) } @@ -140,7 +114,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) { return nil, fmt.Errorf("file_id parameter is required") } - for _, f := range uploadedFiles { + for _, f := range UploadedFiles { if id == f.ID { return &f, nil } @@ -184,14 +158,14 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli } // Remove upload from list - for i, f := range uploadedFiles { + for i, f := range UploadedFiles { if f.ID == file.ID { - uploadedFiles = append(uploadedFiles[:i], uploadedFiles[i+1:]...) + UploadedFiles = append(UploadedFiles[:i], UploadedFiles[i+1:]...) break } } - saveUploadConfig(appConfig.UploadDir) + utils.SaveConfig(appConfig.UploadDir, UploadedFilesFile, UploadedFiles) return c.JSON(DeleteStatus{ Id: file.ID, Object: "file", diff --git a/core/http/endpoints/openai/files_test.go b/core/http/endpoints/openai/files_test.go index a036bd0d..e1c1011e 100644 --- a/core/http/endpoints/openai/files_test.go +++ b/core/http/endpoints/openai/files_test.go @@ -3,6 +3,7 @@ package openai import ( "encoding/json" "fmt" + "github.com/rs/zerolog/log" "io" "mime/multipart" "net/http" @@ -73,6 +74,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) { app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option)) t.Run("UploadFilesEndpoint file size exceeds limit", func(t *testing.T) { + t.Cleanup(tearDown()) resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 11, option) assert.NoError(t, err) @@ -80,46 +82,54 @@ func TestUploadFileExceedSizeLimit(t *testing.T) { assert.Contains(t, bodyToString(resp, t), "exceeds upload limit") }) t.Run("UploadFilesEndpoint purpose not defined", func(t *testing.T) { + t.Cleanup(tearDown()) resp, _ := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "", 5, option) assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode) assert.Contains(t, bodyToString(resp, t), "Purpose is not defined") }) t.Run("UploadFilesEndpoint file already exists", func(t *testing.T) { + t.Cleanup(tearDown()) f1 := CallFilesUploadEndpointWithCleanup(t, app, "foo.txt", "file", "fine-tune", 5, option) resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 5, option) fmt.Println(f1) - fmt.Printf("ERror: %v", err) + fmt.Printf("ERror: %v\n", err) + fmt.Printf("resp: %+v\n", resp) assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode) assert.Contains(t, bodyToString(resp, t), "File already exists") }) t.Run("UploadFilesEndpoint file uploaded successfully", func(t *testing.T) { + t.Cleanup(tearDown()) file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option) // Check if file exists in the disk - filePath := filepath.Join(option.UploadDir, utils2.SanitizeFileName("test.txt")) + testName := strings.Split(t.Name(), "/")[1] + fileName := testName + "-test.txt" + filePath := filepath.Join(option.UploadDir, utils2.SanitizeFileName(fileName)) _, err := os.Stat(filePath) assert.False(t, os.IsNotExist(err)) assert.Equal(t, file.Bytes, 5242880) assert.NotEmpty(t, file.CreatedAt) - assert.Equal(t, file.Filename, "test.txt") + assert.Equal(t, file.Filename, fileName) assert.Equal(t, file.Purpose, "fine-tune") }) t.Run("ListFilesEndpoint without purpose parameter", func(t *testing.T) { + t.Cleanup(tearDown()) resp, err := CallListFilesEndpoint(t, app, "") assert.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) listFiles := responseToListFile(t, resp) - if len(listFiles.Data) != len(uploadedFiles) { - t.Errorf("Expected %v files, got %v files", len(uploadedFiles), len(listFiles.Data)) + if len(listFiles.Data) != len(UploadedFiles) { + t.Errorf("Expected %v files, got %v files", len(UploadedFiles), len(listFiles.Data)) } }) t.Run("ListFilesEndpoint with valid purpose parameter", func(t *testing.T) { + t.Cleanup(tearDown()) _ = CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option) resp, err := CallListFilesEndpoint(t, app, "fine-tune") @@ -131,6 +141,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) { } }) t.Run("ListFilesEndpoint with invalid query parameter", func(t *testing.T) { + t.Cleanup(tearDown()) resp, err := CallListFilesEndpoint(t, app, "not-so-fine-tune") assert.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) @@ -142,6 +153,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) { } }) t.Run("GetFilesContentsEndpoint get file content", func(t *testing.T) { + t.Cleanup(tearDown()) req := httptest.NewRequest("GET", "/files", nil) resp, _ := app.Test(req) assert.Equal(t, 200, resp.StatusCode) @@ -175,8 +187,10 @@ func CallFilesContentEndpoint(t *testing.T, app *fiber.App, fileId string) (*htt } func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) (*http.Response, error) { + testName := strings.Split(t.Name(), "/")[1] + // Create a file that exceeds the limit - file := createTestFile(t, fileName, fileSize, appConfig) + file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig) // Creating a new HTTP Request body, writer := newMultipartFile(file.Name(), tag, purpose) @@ -188,7 +202,8 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File { // Create a file that exceeds the limit - file := createTestFile(t, fileName, fileSize, appConfig) + testName := strings.Split(t.Name(), "/")[1] + file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig) // Creating a new HTTP Request body, writer := newMultipartFile(file.Name(), tag, purpose) @@ -199,11 +214,12 @@ func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, assert.NoError(t, err) f := responseToFile(t, resp) - id := f.ID - t.Cleanup(func() { - _, err := CallFilesDeleteEndpoint(t, app, id) - assert.NoError(t, err) - }) + //id := f.ID + //t.Cleanup(func() { + // _, err := CallFilesDeleteEndpoint(t, app, id) + // assert.NoError(t, err) + // assert.Empty(t, UploadedFiles) + //}) return f @@ -240,7 +256,8 @@ func createTestFile(t *testing.T, name string, sizeMB int, option *config.Applic t.Fatalf("Error MKDIR: %v", err) } - file, _ := os.Create(name) + file, err := os.Create(name) + assert.NoError(t, err) file.WriteString(strings.Repeat("a", sizeMB*1024*1024)) // sizeMB MB File t.Cleanup(func() { @@ -280,7 +297,7 @@ func responseToListFile(t *testing.T, resp *http.Response) ListFiles { err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles) if err != nil { - fmt.Printf("Failed to decode response: %s", err) + log.Error().Msgf("Failed to decode response: %s", err) } return listFiles diff --git a/main.go b/main.go index 400dcb57..651dd1c2 100644 --- a/main.go +++ b/main.go @@ -149,6 +149,12 @@ func main() { EnvVars: []string{"UPLOAD_PATH"}, Value: "/tmp/localai/upload", }, + &cli.StringFlag{ + Name: "config-path", + Usage: "Path to store uploads from files api", + EnvVars: []string{"CONFIG_PATH"}, + Value: "/tmp/localai/config", + }, &cli.StringFlag{ Name: "backend-assets-path", Usage: "Path used to extract libraries that are required by some of the backends in runtime.", @@ -241,6 +247,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit config.WithImageDir(ctx.String("image-path")), config.WithAudioDir(ctx.String("audio-path")), config.WithUploadDir(ctx.String("upload-path")), + config.WithConfigsDir(ctx.String("config-path")), config.WithF16(ctx.Bool("f16")), config.WithStringGalleries(ctx.String("galleries")), config.WithModelLibraryURL(ctx.String("remote-library")), diff --git a/pkg/utils/config.go b/pkg/utils/config.go new file mode 100644 index 00000000..a9167ed3 --- /dev/null +++ b/pkg/utils/config.go @@ -0,0 +1,41 @@ +package utils + +import ( + "encoding/json" + "github.com/rs/zerolog/log" + "os" + "path/filepath" +) + +func SaveConfig(filePath, fileName string, obj any) { + file, err := json.MarshalIndent(obj, "", " ") + if err != nil { + log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err) + } + + absolutePath := filepath.Join(filePath, fileName) + err = os.WriteFile(absolutePath, file, 0644) + if err != nil { + log.Error().Msgf("Failed to save configuration file to %s: %s", absolutePath, err) + } +} + +func LoadConfig(filePath, fileName string, obj interface{}) { + uploadFilePath := filepath.Join(filePath, fileName) + + _, err := os.Stat(uploadFilePath) + if os.IsNotExist(err) { + log.Debug().Msgf("No configuration file found at %s", uploadFilePath) + return + } + + file, err := os.ReadFile(uploadFilePath) + if err != nil { + log.Error().Msgf("Failed to read file: %s", err) + } else { + err = json.Unmarshal(file, &obj) + if err != nil { + log.Error().Msgf("Failed to JSON unmarshal the file %s: %v", uploadFilePath, err) + } + } +} From 607586e0b7b26a4d4c5cf5e4830ac60eb2520540 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 26 Mar 2024 22:56:02 +0100 Subject: [PATCH 0191/2895] fix: downgrade torch (#1902) Signed-off-by: Ettore Di Giacinto --- .../python/common-env/transformers/transformers-nvidia.yml | 4 ++-- backend/python/common-env/transformers/transformers.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index 55361234..e8d8155b 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -89,8 +89,8 @@ dependencies: - six==1.16.0 - sympy==1.12 - tokenizers - - torch==2.2.1 - - torchvision==0.17.1 + - torch==2.1.2 + - torchvision==0.16.2 - torchaudio==2.1.2 - tqdm==4.66.1 - triton==2.1.0 diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index bdf8c36f..be378f67 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -76,8 +76,8 @@ dependencies: - six==1.16.0 - sympy==1.12 - tokenizers - - torch==2.2.1 - - torchvision==0.17.1 + - torch==2.1.2 + - torchvision==0.16.2 - torchaudio==2.1.2 - tqdm==4.66.1 - triton==2.1.0 From d3c283ac19e76ac6c87f6d5c9aa04fb9b43f7371 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 26 Mar 2024 22:56:42 +0100 Subject: [PATCH 0192/2895] :arrow_up: Update docs version mudler/LocalAI (#1903) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 20ca21c5..b6372479 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.10.1" + "version": "v2.11.0" } From b500ceaf735b1678516774bb26a0ddae406e2c23 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 27 Mar 2024 00:21:54 +0100 Subject: [PATCH 0193/2895] :arrow_up: Update ggerganov/llama.cpp (#1904) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 518287da..bd07eac7 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=b06c16ef9f81d84da520232c125d4d8a1d273736 +CPPLLAMA_VERSION?=557410b8f06380560155ac7fcb8316d71ddc9837 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e7cbe32601bed0748bd163f91dbf51e3e3704c7e Mon Sep 17 00:00:00 2001 From: fakezeta Date: Wed, 27 Mar 2024 00:31:43 +0100 Subject: [PATCH 0194/2895] feat: Openvino runtime for transformer backend and streaming support for Openvino and CUDA (#1892) * fixes #1775 and #1774 Add BitsAndBytes Quantization and fixes embedding on CUDA devices * Manage 4bit and 8 bit quantization Manage different BitsAndBytes options with the quantization: parameter in yaml * fix compilation errors on non CUDA environment * OpenVINO draft First draft of OpenVINO integration in transformer backend * first working implementation * Streaming working * Small fix for regression on CUDA and XPU * use pip version of optimum[openvino] * Update backend/python/transformers/transformers_server.py Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .../common-env/transformers/transformers.yml | 7 +- .../transformers/transformers_server.py | 101 +++++++++++++++--- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index be378f67..3b3b8fe7 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -34,6 +34,7 @@ dependencies: - boto3==1.28.61 - botocore==1.31.61 - certifi==2023.7.22 + - coloredlogs==15.0.1 - TTS==0.22.0 - charset-normalizer==3.3.0 - datasets==2.14.5 @@ -48,6 +49,7 @@ dependencies: - funcy==2.0 - grpcio==1.59.0 - huggingface-hub + - humanfriendly==10.0 - idna==3.4 - jinja2==3.1.2 - jmespath==1.0.1 @@ -57,7 +59,10 @@ dependencies: - multiprocess==0.70.15 - networkx - numpy==1.26.0 - - optimum==1.17.1 + - onnx==1.15.0 + - openvino==2024.0.0 + - openvino-telemetry==2023.2.1 + - optimum[openvino]==1.17.1 - packaging==23.2 - pandas - peft==0.5.0 diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 264e7fad..a8702021 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -8,6 +8,7 @@ import argparse import signal import sys import os +from threading import Thread import time import backend_pb2 @@ -17,13 +18,16 @@ import grpc import torch import torch.cuda + XPU=os.environ.get("XPU", "0") == "1" if XPU: import intel_extension_for_pytorch as ipex from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM - from transformers import AutoTokenizer, AutoModel, set_seed + from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer + from optimum.intel.openvino import OVModelForCausalLM + from openvino.runtime import Core else: - from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig + from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -81,6 +85,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): compute=torch.bfloat16 self.CUDA = request.CUDA + self.OV=False device_map="cpu" @@ -105,23 +110,55 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): bnb_4bit_compute_dtype = None, load_in_8bit=True, ) - - + try: if request.Type == "AutoModelForCausalLM": if XPU: - if quantization == "xpu_4bit": + device_map="xpu" + compute=torch.float16 + if request.Quantization == "xpu_4bit": xpu_4bit = True - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, - device_map="xpu", load_in_4bit=xpu_4bit) + xpu_8bit = False + elif request.Quantization == "xpu_8bit": + xpu_4bit = False + xpu_8bit = True + else: + xpu_4bit = False + xpu_8bit = False + self.model = AutoModelForCausalLM.from_pretrained(model_name, + trust_remote_code=request.TrustRemoteCode, + use_safetensors=True, + device_map=device_map, + load_in_4bit=xpu_4bit, + load_in_8bit=xpu_8bit, + torch_dtype=compute) else: - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute) + self.model = AutoModelForCausalLM.from_pretrained(model_name, + trust_remote_code=request.TrustRemoteCode, + use_safetensors=True, + quantization_config=quantization, + device_map=device_map, + torch_dtype=compute) + elif request.Type == "OVModelForCausalLM": + if "GPU" in Core().available_devices: + device_map="GPU" + else: + device_map="CPU" + self.model = OVModelForCausalLM.from_pretrained(model_name, + compile=True, + device=device_map) + self.OV = True else: - self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute) + self.model = AutoModel.from_pretrained(model_name, + trust_remote_code=request.TrustRemoteCode, + use_safetensors=True, + quantization_config=quantization, + device_map=device_map, + torch_dtype=compute) self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) self.XPU = False - if XPU: + if XPU and self.OV == False: self.XPU = True try: print("Optimizing model", model_name, "to XPU.", file=sys.stderr) @@ -130,6 +167,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): print("Not using XPU:", err, file=sys.stderr) except Exception as err: + print("Error:", err, file=sys.stderr) return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") # Implement your logic here for the LoadModel service # Replace this with your desired response @@ -167,7 +205,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): print("Embeddings:", sentence_embeddings, file=sys.stderr) return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) - def Predict(self, request, context): + def Predict(self, request, context, streaming=False): """ Generates text based on the given prompt and sampling parameters. @@ -186,15 +224,42 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Tokens > 0: max_tokens = request.Tokens - inputs = self.tokenizer(request.Prompt, return_tensors="pt").input_ids + inputs = self.tokenizer(request.Prompt, return_tensors="pt") if self.CUDA: inputs = inputs.to("cuda") - if XPU: + if XPU and self.OV == False: inputs = inputs.to("xpu") + streaming = False - outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP, do_sample=True, pad_token_id=self.tokenizer.eos_token_id) - generated_text = self.tokenizer.batch_decode(outputs[:, inputs.shape[1]:], skip_special_tokens=True)[0] - + if streaming: + streamer=TextIteratorStreamer(self.tokenizer, + skip_prompt=True, + skip_special_tokens=True) + config=dict(inputs, + max_new_tokens=max_tokens, + temperature=request.Temperature, + top_p=request.TopP, + top_k=request.TopK, + do_sample=True, + attention_mask=inputs["attention_mask"], + eos_token_id=self.tokenizer.eos_token_id, + pad_token_id=self.tokenizer.eos_token_id, + streamer=streamer) + thread=Thread(target=self.model.generate, kwargs=config) + thread.start() + generated_text = "" + for new_text in streamer: + generated_text += new_text + yield backend_pb2.Reply(message=bytes(new_text, encoding='utf-8')) + else: + outputs = self.model.generate(inputs["input_ids"], + max_new_tokens=max_tokens, + temperature=request.Temperature, + top_p=request.TopP, + top_k=request.TopK, + do_sample=True, + pad_token=self.tokenizer.eos_token_id) + generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0] return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) def PredictStream(self, request, context): @@ -208,7 +273,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): Returns: backend_pb2.Result: The predict stream result. """ - yield self.Predict(request, context) + iterations = self.Predict(request, context, streaming=True) + for iteration in iterations: + yield iteration def serve(address): From 8210ffcb6c721ed4931d3f4b0bf52e787c52d7d0 Mon Sep 17 00:00:00 2001 From: fakezeta Date: Wed, 27 Mar 2024 17:50:35 +0100 Subject: [PATCH 0195/2895] feat: Token Stream support for Transformer, fix: missing package for OpenVINO (#1908) * Streaming working * Small fix for regression on CUDA and XPU * use pip version of optimum[openvino] * Update backend/python/transformers/transformers_server.py Signed-off-by: Ettore Di Giacinto * Token streaming support fix optimum[openvino] package in install.sh * Token Streaming support --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- .../python/common-env/transformers/install.sh | 2 +- .../transformers/transformers_server.py | 118 +++++++++++------- 2 files changed, 72 insertions(+), 48 deletions(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index e268fcc8..8502adde 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then # Intel GPU: If the directory exists, we assume we are using the intel image # (no conda env) # https://github.com/intel/intel-extension-for-pytorch/issues/538 - pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed + pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] fi if [ "$PIP_CACHE_PURGE" = true ] ; then diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index a8702021..04324d9b 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -9,6 +9,7 @@ import signal import sys import os from threading import Thread +import asyncio import time import backend_pb2 @@ -205,17 +206,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): print("Embeddings:", sentence_embeddings, file=sys.stderr) return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) - def Predict(self, request, context, streaming=False): - """ - Generates text based on the given prompt and sampling parameters. - - Args: - request: The predict request. - context: The gRPC context. - - Returns: - backend_pb2.Reply: The predict result. - """ + async def _predict(self, request, context, streaming=False): set_seed(request.Seed) if request.TopP == 0: request.TopP = 0.9 @@ -248,21 +239,54 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): thread=Thread(target=self.model.generate, kwargs=config) thread.start() generated_text = "" - for new_text in streamer: - generated_text += new_text - yield backend_pb2.Reply(message=bytes(new_text, encoding='utf-8')) + try: + for new_text in streamer: + generated_text += new_text + yield backend_pb2.Reply(message=bytes(new_text, encoding='utf-8')) + finally: + thread.join() else: - outputs = self.model.generate(inputs["input_ids"], - max_new_tokens=max_tokens, - temperature=request.Temperature, - top_p=request.TopP, - top_k=request.TopK, - do_sample=True, - pad_token=self.tokenizer.eos_token_id) + if XPU and self.OV == False: + outputs = self.model.generate(inputs["input_ids"], + max_new_tokens=max_tokens, + temperature=request.Temperature, + top_p=request.TopP, + top_k=request.TopK, + do_sample=True, + pad_token=self.tokenizer.eos_token_id) + else: + outputs = self.model.generate(inputs["input_ids"], + max_new_tokens=max_tokens, + temperature=request.Temperature, + top_p=request.TopP, + top_k=request.TopK, + do_sample=True, + attention_mask=inputs["attention_mask"], + eos_token_id=self.tokenizer.eos_token_id, + pad_token_id=self.tokenizer.eos_token_id) generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0] - return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) - def PredictStream(self, request, context): + if streaming: + return + + yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) + + async def Predict(self, request, context): + """ + Generates text based on the given prompt and sampling parameters. + + Args: + request: The predict request. + context: The gRPC context. + + Returns: + backend_pb2.Reply: The predict result. + """ + gen = self._predict(request, context, streaming=False) + res = await gen.__anext__() + return res + + async def PredictStream(self, request, context): """ Generates text based on the given prompt and sampling parameters, and streams the results. @@ -273,33 +297,33 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): Returns: backend_pb2.Result: The predict stream result. """ - iterations = self.Predict(request, context, streaming=True) - for iteration in iterations: - yield iteration + iterations = self._predict(request, context, streaming=True) + try: + async for iteration in iterations: + yield iteration + finally: + await iterations.aclose() - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) +async def serve(address): + # Start asyncio gRPC server + server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + # Add the servicer to the server backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + # Bind the server to the address server.add_insecure_port(address) - server.start() + + # Gracefully shutdown the server on SIGTERM or SIGINT + loop = asyncio.get_event_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler( + sig, lambda: asyncio.ensure_future(server.stop(5)) + ) + + # Start the server + await server.start() print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) + # Wait for the server to be terminated + await server.wait_for_termination() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run the gRPC server.") @@ -308,4 +332,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - serve(args.addr) + asyncio.run(serve(args.addr)) \ No newline at end of file From 93f0b7ae03ec0a92375616fda62ca3a0ebb075e9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 27 Mar 2024 18:17:12 +0100 Subject: [PATCH 0196/2895] update hot topics Signed-off-by: Ettore Di Giacinto --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8cf15d5a..5f1bfe0b 100644 --- a/README.md +++ b/README.md @@ -50,14 +50,12 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Openvino support: https://github.com/mudler/LocalAI/pull/1892 - Vector store: https://github.com/mudler/LocalAI/pull/1795 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 -- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 +- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715 - Upload file API: https://github.com/mudler/LocalAI/pull/1703 -- Tools API support: https://github.com/mudler/LocalAI/pull/1715 -- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714 -- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 -- Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653 +- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 / Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653 - Mamba support: https://github.com/mudler/LocalAI/pull/1589 - Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 - 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489 From 66ee4afb952d085f469ce5e47f803746d010c285 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 27 Mar 2024 21:10:58 +0100 Subject: [PATCH 0197/2895] feat(welcome): add simple welcome page (#1912) * feat(welcome): add simple welcome page * feat(api): add 404 handling --- core/config/application_config.go | 5 +++ core/http/api.go | 42 +++++++++++++++++++- core/http/views/404.html | 33 ++++++++++++++++ core/http/views/index.html | 58 ++++++++++++++++++++++++++++ core/http/views/partials/footer.html | 4 ++ core/http/views/partials/head.html | 13 +++++++ core/http/views/partials/navbar.html | 15 +++++++ go.mod | 3 ++ go.sum | 6 +++ main.go | 11 ++++++ 10 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 core/http/views/404.html create mode 100644 core/http/views/index.html create mode 100644 core/http/views/partials/footer.html create mode 100644 core/http/views/partials/head.html create mode 100644 core/http/views/partials/navbar.html diff --git a/core/config/application_config.go b/core/config/application_config.go index c2d4e13a..49b35f97 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -15,6 +15,7 @@ type ApplicationConfig struct { ConfigFile string ModelPath string UploadLimitMB, Threads, ContextSize int + DisableWelcomePage bool F16 bool Debug, DisableMessage bool ImageDir string @@ -105,6 +106,10 @@ var EnableWatchDogBusyCheck = func(o *ApplicationConfig) { o.WatchDogBusy = true } +var DisableWelcomePage = func(o *ApplicationConfig) { + o.DisableWelcomePage = true +} + func SetWatchDogBusyTimeout(t time.Duration) AppOption { return func(o *ApplicationConfig) { o.WatchDogBusyTimeout = t diff --git a/core/http/api.go b/core/http/api.go index de0a4939..365407d8 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -1,12 +1,15 @@ package http import ( + "embed" "encoding/json" "errors" - "github.com/go-skynet/LocalAI/pkg/utils" + "net/http" "os" "strings" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" @@ -21,6 +24,7 @@ import ( "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/recover" + "github.com/gofiber/template/html/v2" ) func readAuthHeader(c *fiber.Ctx) string { @@ -41,9 +45,14 @@ func readAuthHeader(c *fiber.Ctx) string { return authHeader } +//go:embed views/* +var viewsfs embed.FS + func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { + engine := html.NewFileSystem(http.FS(viewsfs), ".html") // Return errors as JSON responses app := fiber.New(fiber.Config{ + Views: engine, BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB DisableStartupMessage: appConfig.DisableMessage, // Override default error handler @@ -168,6 +177,21 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + if !appConfig.DisableWelcomePage { + models, _ := ml.ListModels() + backendConfigs := cl.GetAllBackendConfigs() + app.Get("/", auth, func(c *fiber.Ctx) error { + // Render index + return c.Render("views/index", fiber.Map{ + "Title": "LocalAI API - " + internal.PrintableVersion(), + "Version": internal.PrintableVersion(), + "Models": models, + "ModelsConfig": backendConfigs, + "ApplicationConfig": appConfig, + }) + }) + } + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) @@ -275,5 +299,21 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Get("/metrics", localai.LocalAIMetricsEndpoint()) + // Define a custom 404 handler + app.Use(func(c *fiber.Ctx) error { + + // Check if the request accepts JSON + if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { + // The client expects a JSON response + c.Status(fiber.StatusNotFound).JSON(fiber.Map{ + "error": "Resource not found", + }) + } else { + // The client expects an HTML response + c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{}) + } + return nil + }) + return app, nil } diff --git a/core/http/views/404.html b/core/http/views/404.html new file mode 100644 index 00000000..359d8505 --- /dev/null +++ b/core/http/views/404.html @@ -0,0 +1,33 @@ + + + +{{template "views/partials/head" .}} + + +
+ + {{template "views/partials/navbar" .}} + +
+
+

Welcome to your LocalAI instance!

+
+ +
+

The FOSS alternative to OpenAI, Claude, ...

+ Documentation +
+ +
+

Nothing found!

+
+
+ + {{template "views/partials/footer" .}} +
+ + + diff --git a/core/http/views/index.html b/core/http/views/index.html new file mode 100644 index 00000000..ad14f667 --- /dev/null +++ b/core/http/views/index.html @@ -0,0 +1,58 @@ + + + + + + {{.Title}} + + + + + + + +
+ + {{template "views/partials/navbar" .}} + +
+
+

Welcome to your LocalAI instance!

+
+ +
+

The FOSS alternative to OpenAI, Claude, ...

+ Documentation +
+ +
+

Installed models

+

We have {{len .ModelsConfig}} pre-loaded models available.

+
    + {{ range .ModelsConfig }} +
  • +

    {{.Name}}

    + {{ if .Usage }} +
    {{.Usage}}
    + {{ end }} + {{ if .Description }} +

    {{.Description}}

    + {{ end }} +
  • + {{ end }} +
+
+
+ + {{template "views/partials/footer" .}} +
+ + + diff --git a/core/http/views/partials/footer.html b/core/http/views/partials/footer.html new file mode 100644 index 00000000..7fc7e504 --- /dev/null +++ b/core/http/views/partials/footer.html @@ -0,0 +1,4 @@ + \ No newline at end of file diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html new file mode 100644 index 00000000..59cdea33 --- /dev/null +++ b/core/http/views/partials/head.html @@ -0,0 +1,13 @@ + + + + {{.Title}} + + + + + \ No newline at end of file diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html new file mode 100644 index 00000000..2717f974 --- /dev/null +++ b/core/http/views/partials/navbar.html @@ -0,0 +1,15 @@ + \ No newline at end of file diff --git a/go.mod b/go.mod index 8a43df1d..79068904 100644 --- a/go.mod +++ b/go.mod @@ -75,6 +75,9 @@ require ( github.com/docker/go-units v0.4.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/gofiber/template v1.8.3 // indirect + github.com/gofiber/template/html/v2 v2.1.1 // indirect + github.com/gofiber/utils v1.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect diff --git a/go.sum b/go.sum index bef84d57..a2c5b912 100644 --- a/go.sum +++ b/go.sum @@ -96,6 +96,12 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw= github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw= +github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc= +github.com/gofiber/template v1.8.3/go.mod h1:bs/2n0pSNPOkRa5VJ8zTIvedcI/lEYxzV3+YPXdBvq8= +github.com/gofiber/template/html/v2 v2.1.1 h1:QEy3O3EBkvwDthy5bXVGUseOyO6ldJoiDxlF4+MJiV8= +github.com/gofiber/template/html/v2 v2.1.1/go.mod h1:2G0GHHOUx70C1LDncoBpe4T6maQbNa4x1CVNFW0wju0= +github.com/gofiber/utils v1.1.0 h1:vdEBpn7AzIUJRhe+CiTOJdUcTg4Q9RK+pEa0KPbLdrM= +github.com/gofiber/utils v1.1.0/go.mod h1:poZpsnhBykfnY1Mc0KeEa6mSHrS3dV0+oBWyeQmb2e0= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= diff --git a/main.go b/main.go index 651dd1c2..f000aa71 100644 --- a/main.go +++ b/main.go @@ -189,6 +189,12 @@ func main() { EnvVars: []string{"WATCHDOG_IDLE"}, Value: false, }, + &cli.BoolFlag{ + Name: "disable-welcome", + Usage: "Disable welcome pages", + EnvVars: []string{"DISABLE_WELCOME"}, + Value: false, + }, &cli.BoolFlag{ Name: "enable-watchdog-busy", Usage: "Enable watchdog for stopping busy backends that exceed a defined threshold.", @@ -264,6 +270,11 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit idleWatchDog := ctx.Bool("enable-watchdog-idle") busyWatchDog := ctx.Bool("enable-watchdog-busy") + + if ctx.Bool("disable-welcome") { + opts = append(opts, config.DisableWelcomePage) + } + if idleWatchDog || busyWatchDog { opts = append(opts, config.EnableWatchDog) if idleWatchDog { From 7ef5f3b473fdbbcf8d89cc942190a03a92c064c6 Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Wed, 27 Mar 2024 21:12:04 +0100 Subject: [PATCH 0198/2895] :arrow_up: Update M0Rf30/go-tiny-dream (#1911) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bd07eac7..7334ed4f 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485 # tinydream version -TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a +TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293 export BUILD_TYPE?= export STABLE_BUILD_TYPE?=$(BUILD_TYPE) From 0c0efc871c9673efa733eccc880b65f987006051 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:12:19 -0500 Subject: [PATCH 0199/2895] fix(build): better CI logging and correct some build failure modes in Makefile (#1899) * feat: group make output by target when running parallelized builds in CI * fix: quote GO_TAGS in makefile to fix handling of whitespace in value * fix: set CPATH to find opencv2 in it's commonly installed location * fix: add missing go mod dropreplace for go-llama.cpp * chore: remove opencv symlink from github workflows --- .github/workflows/image-pr.yml | 4 ++-- .github/workflows/image.yml | 4 ++-- .github/workflows/release.yaml | 1 - .github/workflows/test-extra.yml | 16 ++++++++-------- .github/workflows/test.yml | 2 +- Makefile | 7 ++++--- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 17456617..aa59188c 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -22,7 +22,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} - makeflags: "-j3" + makeflags: "--jobs=3 --output-sync=target" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -81,7 +81,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} - makeflags: "-j3" + makeflags: "--jobs=3 --output-sync=target" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 6e93cb9a..40deb0ec 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -27,7 +27,7 @@ jobs: runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} aio: ${{ matrix.aio }} - makeflags: "-j3" + makeflags: "--jobs=3 --output-sync=target" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -207,7 +207,7 @@ jobs: runs-on: ${{ matrix.runs-on }} aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} - makeflags: "-j3" + makeflags: "--jobs=3 --output-sync=target" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index aa0a270b..6ac816ee 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -102,7 +102,6 @@ jobs: - name: Dependencies run: | sudo apt-get install -y --no-install-recommends libopencv-dev - sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 - name: Build stablediffusion run: | make backend-assets/grpc/stablediffusion diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 68da2c56..5f61835d 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -33,7 +33,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + sudo apt-get install -y libopencv-dev sudo rm -rfv /usr/bin/conda || true @@ -62,7 +62,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + sudo apt-get install -y libopencv-dev sudo rm -rfv /usr/bin/conda || true @@ -91,7 +91,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + sudo apt-get install -y libopencv-dev sudo rm -rfv /usr/bin/conda || true @@ -121,7 +121,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + sudo apt-get install -y libopencv-dev sudo rm -rfv /usr/bin/conda || true @@ -152,7 +152,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + sudo apt-get install -y libopencv-dev sudo rm -rfv /usr/bin/conda || true @@ -223,7 +223,7 @@ jobs: # sudo apt-get update && \ # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch - # sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + # sudo apt-get install -y libopencv-dev # sudo rm -rfv /usr/bin/conda || true @@ -255,7 +255,7 @@ jobs: # sudo apt-get update && \ # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch - # sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + # sudo apt-get install -y libopencv-dev # sudo rm -rfv /usr/bin/conda || true # - name: Test vllm # run: | @@ -281,7 +281,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + sudo apt-get install -y libopencv-dev sudo rm -rfv /usr/bin/conda || true - name: Test vall-e-x run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6d837821..203aeeca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -75,7 +75,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + sudo apt-get install -y libopencv-dev sudo rm -rfv /usr/bin/conda || true PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers diff --git a/Makefile b/Makefile index 7334ed4f..da91fb2d 100644 --- a/Makefile +++ b/Makefile @@ -224,7 +224,7 @@ sources/go-stable-diffusion: cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1 sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion - $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a + CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a ## tiny-dream sources/go-tiny-dream: @@ -263,6 +263,7 @@ dropreplace: $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion $(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang + $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp prepare-sources: get-sources replace $(GOCMD) mod download @@ -531,7 +532,7 @@ backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-asse $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ + CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc @@ -556,7 +557,7 @@ docker: docker build \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ - --build-arg GO_TAGS=$(GO_TAGS) \ + --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ -t $(DOCKER_IMAGE) . From 160eb48b2b2aa74f0c30046da483cfd7cd356dc2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 27 Mar 2024 22:47:59 +0100 Subject: [PATCH 0200/2895] Update quickstart.md --- docs/content/docs/getting-started/quickstart.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 33ec4cfa..94500655 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -50,8 +50,8 @@ Start the image with Docker: ```bash docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu # For Nvidia GPUs: -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11 -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12 ``` From 2266d8263c5beb8e4386de5c72a98c5d9f348e35 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 27 Mar 2024 22:48:46 +0100 Subject: [PATCH 0201/2895] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5f1bfe0b..3c5c1a52 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO ```bash docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu # or, if you have an Nvidia GPU: -# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-cuda12 +# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 ``` ## 🚀 [Features](https://localai.io/features/) From 07c4bdda7c786c382950d7d3ae5982eccfbccb9a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 27 Mar 2024 22:57:59 +0100 Subject: [PATCH 0202/2895] :arrow_up: Update ggerganov/llama.cpp (#1913) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index da91fb2d..e61fa6d6 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=557410b8f06380560155ac7fcb8316d71ddc9837 +CPPLLAMA_VERSION?=a016026a3ac16d8c9b993a3573f19b9556d67de4 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 07c49ee4b870760fad81e38de6f3be4d775532a4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 27 Mar 2024 23:53:13 +0100 Subject: [PATCH 0203/2895] :arrow_up: Update ggerganov/whisper.cpp (#1914) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e61fa6d6..3258bbe3 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=1558ec5a16cb2b2a0bf54815df1d41f83dc3815b +WHISPER_CPP_VERSION?=2948c740a2bf43190b8e3badb6f1e147f11f96d1 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 23b833d1715677b0f6388f80f8fa1e0c61b64488 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 28 Mar 2024 12:42:37 +0100 Subject: [PATCH 0204/2895] Update run-other-models.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/run-other-models.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/getting-started/run-other-models.md b/docs/content/docs/getting-started/run-other-models.md index 4420550d..2b72b93d 100644 --- a/docs/content/docs/getting-started/run-other-models.md +++ b/docs/content/docs/getting-started/run-other-models.md @@ -80,7 +80,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` | | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` | | [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` | {{% /tab %}} @@ -111,7 +111,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` | | [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` | +| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` | {{% /tab %}} {{< /tabs >}} @@ -123,4 +123,4 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2 ``` -{{% /alert %}} \ No newline at end of file +{{% /alert %}} From 13ccd2afef1f00f2579fb7247d52686f5d06f5db Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 28 Mar 2024 18:16:58 +0100 Subject: [PATCH 0205/2895] docs(aio-usage): update docs to show examples (#1921) Signed-off-by: Ettore Di Giacinto --- .../docs/getting-started/quickstart.md | 204 +++++++++++++++++- docs/content/docs/reference/aio-images.md | 36 ++-- 2 files changed, 222 insertions(+), 18 deletions(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 94500655..ff05afaf 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -68,8 +68,8 @@ services: healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] interval: 1m - timeout: 120m - retries: 120 + timeout: 20m + retries: 5 ports: - 8080:8080 environment: @@ -89,8 +89,208 @@ services: For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}). +{{% alert icon="💡 Models caching" %}} + +The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images. + +You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). + +If you want to use a named model or a local directory, you can mount it as a volume to `/build/models`: + +```bash +docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai/localai:latest-aio-cpu +``` + +or associate a volume: + +```bash +docker create volume localai-models +docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu +``` + +{{% /alert %}} + +## Try it out + +LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [All-in-one Images]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`. + +### Text Generation + +Creates a model response for the given chat conversation. [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat/create). + +
+ +```bash +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ "model": "gpt-4", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' +``` + +
+ +### GPT Vision + +Understand images. + +
+ +```bash +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4-vision-preview", + "messages": [ + { + "role": "user", "content": [ + {"type":"text", "text": "What is in the image?"}, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + } + ], + "temperature": 0.9 + } + ] + }' +``` + +
+ +### Function calling + +Call functions + +
+ +```bash +curl https://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4", + "messages": [ + { + "role": "user", + "content": "What is the weather like in Boston?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "tool_choice": "auto" + }' +``` + +
+ +### Image Generation + +Creates an image given a prompt. [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create). + +
+ +```bash +curl http://localhost:8080/v1/images/generations \ + -H "Content-Type: application/json" -d '{ + "prompt": "A cute baby sea otter", + "size": "256x256" + }' +``` + +
+ +### Text to speech + + +Generates audio from the input text. [OpenAI documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech). + +
+ +```bash +curl http://localhost:8080/v1/audio/speech \ + -H "Content-Type: application/json" \ + -d '{ + "model": "tts-1", + "input": "The quick brown fox jumped over the lazy dog.", + "voice": "alloy" + }' \ + --output speech.mp3 +``` + +
+ + +### Audio Transcription + +Transcribes audio into the input language. [OpenAI Documentation](https://platform.openai.com/docs/api-reference/audio/createTranscription). + +
+ +Download first a sample to transcribe: + +```bash +wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg +``` + +Send the example audio file to the transcriptions endpoint : +```bash +curl http://localhost:8080/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@$PWD/gb1.ogg" -F model="whisper-1" +``` + +
+ +### Embeddings Generation + +Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. [OpenAI Embeddings](https://platform.openai.com/docs/api-reference/embeddings). + +
+ +```bash +curl http://localhost:8080/embeddings \ + -X POST -H "Content-Type: application/json" \ + -d '{ + "input": "Your text string goes here", + "model": "text-embedding-ada-002" + }' +``` + +
+ +{{% alert icon="💡" %}} + +Don't use the model file as `model` in the request unless you want to handle the prompt template for yourself. + +Use the model names like you would do with OpenAI like in the examples below. For instance `gpt-4-vision-preview`, or `gpt-4`. + +{{% /alert %}} + ## What's next? +There is much more to explore! run any model from huggingface, video generation, and voice cloning with LocalAI, check out the [features]({{%relref "docs/features" %}}) section for a full overview. + Explore further resources and community contributions: - [Build LocalAI and the container image]({{%relref "docs/getting-started/build" %}}) diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md index 331892e9..c2cb57ba 100644 --- a/docs/content/docs/reference/aio-images.md +++ b/docs/content/docs/reference/aio-images.md @@ -7,15 +7,28 @@ weight = 26 All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. -What you can find configured out of the box: +In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below -- Image generation -- Text generation -- Text to audio -- Audio transcription -- Embeddings -- GPT Vision +| Category | Model name | Real model | +| Text Generation | `gpt-4` | `phi-2`(CPU) or `hermes-2-pro-mistral`(GPU) | +| Multimodal | `gpt-4-vision-preview` | `bakllava`(CPU) or `llava-1.6-mistral`(GPU) | +| Text generation | `stablediffusion` | `stablediffusion`(CPU) `dreamshaper-8` (GPU) | +| Audio transcription | `whisper-1` | `whisper` with the `whisper-base` model | +| Text to Audio | `tts-1` | the `en-us-amy-low.onnx` model with `rhasspy` | +| Embeddings | `text-embedding-ada-002` | | +## Usage + +Select the image (CPU or GPU) and start the container with Docker: + +```bash +# CPU example +docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu +``` + +LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models). + +## Available images | Description | Quay | Docker Hub | | --- | --- |-----------------------------------------------| @@ -37,12 +50,3 @@ The AIO Images are inheriting the same environment variables as the base images | `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) | -## Example - -Start the image with Docker: - -```bash -docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu -``` - -LocalAI will automatically download all the required models, and will be available at [localhost:8080](http://localhost:8080/v1/models). From 8477e8fac39641fcb6adda9ae02392ac97bfd4e4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 28 Mar 2024 18:28:30 +0100 Subject: [PATCH 0206/2895] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index ff05afaf..716fe154 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -89,9 +89,9 @@ services: For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}). -{{% alert icon="💡 Models caching" %}} +{{% alert icon="💡" %}} -The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images. +**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images. You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). @@ -104,7 +104,7 @@ docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai or associate a volume: ```bash -docker create volume localai-models +docker volume create localai-models docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu ``` @@ -298,4 +298,4 @@ Explore further resources and community contributions: - [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) - [Container images]({{%relref "docs/reference/container-images" %}}) - [All-in-one Images]({{%relref "docs/reference/aio-images" %}}) -- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples) \ No newline at end of file +- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples) From 4e79294f978fc411508a37fd650dfd0a78a9df26 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 28 Mar 2024 19:52:40 +0100 Subject: [PATCH 0207/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 3c5c1a52..76a5fc08 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Landing page: https://github.com/mudler/LocalAI/pull/1922 - Openvino support: https://github.com/mudler/LocalAI/pull/1892 - Vector store: https://github.com/mudler/LocalAI/pull/1795 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 From bf65ed6eb84d5b856c412d607827dc057c4585d4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 28 Mar 2024 21:52:52 +0100 Subject: [PATCH 0208/2895] feat(webui): add partials, show backends associated to models (#1922) * feat(webui): add partials, show backends associated to models * fix(auth): put assistant and backend under auth --- core/http/api.go | 88 ++++++++++++++------------------------ core/http/render.go | 80 ++++++++++++++++++++++++++++++++++ core/http/views/index.html | 56 +++++++++++------------- go.mod | 10 +++-- go.sum | 34 ++++++--------- 5 files changed, 155 insertions(+), 113 deletions(-) create mode 100644 core/http/render.go diff --git a/core/http/api.go b/core/http/api.go index 365407d8..24216737 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -1,10 +1,8 @@ package http import ( - "embed" "encoding/json" "errors" - "net/http" "os" "strings" @@ -24,7 +22,6 @@ import ( "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/recover" - "github.com/gofiber/template/html/v2" ) func readAuthHeader(c *fiber.Ctx) string { @@ -45,14 +42,10 @@ func readAuthHeader(c *fiber.Ctx) string { return authHeader } -//go:embed views/* -var viewsfs embed.FS - func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { - engine := html.NewFileSystem(http.FS(viewsfs), ".html") // Return errors as JSON responses app := fiber.New(fiber.Config{ - Views: engine, + Views: renderEngine(), BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB DisableStartupMessage: appConfig.DisableMessage, // Override default error handler @@ -177,20 +170,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - if !appConfig.DisableWelcomePage { - models, _ := ml.ListModels() - backendConfigs := cl.GetAllBackendConfigs() - app.Get("/", auth, func(c *fiber.Ctx) error { - // Render index - return c.Render("views/index", fiber.Map{ - "Title": "LocalAI API - " + internal.PrintableVersion(), - "Version": internal.PrintableVersion(), - "Models": models, - "ModelsConfig": backendConfigs, - "ApplicationConfig": appConfig, - }) - }) - } + welcomeRoute( + app, + cl, + ml, + appConfig, + auth, + ) modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) @@ -224,24 +210,24 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) // assistant - app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) // files app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) @@ -290,30 +276,18 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Experimental Backend Statistics Module backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now - app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor)) - app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor)) + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) // models app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/metrics", localai.LocalAIMetricsEndpoint()) + app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) // Define a custom 404 handler - app.Use(func(c *fiber.Ctx) error { - - // Check if the request accepts JSON - if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { - // The client expects a JSON response - c.Status(fiber.StatusNotFound).JSON(fiber.Map{ - "error": "Resource not found", - }) - } else { - // The client expects an HTML response - c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{}) - } - return nil - }) + // Note: keep this at the bottom! + app.Use(notFoundHandler) return app, nil } diff --git a/core/http/render.go b/core/http/render.go new file mode 100644 index 00000000..c5045868 --- /dev/null +++ b/core/http/render.go @@ -0,0 +1,80 @@ +package http + +import ( + "embed" + "fmt" + "html/template" + "net/http" + + "github.com/Masterminds/sprig/v3" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/internal" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + fiberhtml "github.com/gofiber/template/html/v2" + "github.com/russross/blackfriday" +) + +//go:embed views/* +var viewsfs embed.FS + +func notFoundHandler(c *fiber.Ctx) error { + // Check if the request accepts JSON + if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { + // The client expects a JSON response + c.Status(fiber.StatusNotFound).JSON(schema.ErrorResponse{ + Error: &schema.APIError{Message: "Resource not found", Code: fiber.StatusNotFound}, + }) + } else { + // The client expects an HTML response + c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{}) + } + return nil +} + +func welcomeRoute( + app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error, +) { + if appConfig.DisableWelcomePage { + return + } + + models, _ := ml.ListModels() + backendConfigs := cl.GetAllBackendConfigs() + + app.Get("/", auth, func(c *fiber.Ctx) error { + summary := fiber.Map{ + "Title": "LocalAI API - " + internal.PrintableVersion(), + "Version": internal.PrintableVersion(), + "Models": models, + "ModelsConfig": backendConfigs, + "ApplicationConfig": appConfig, + } + + if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { + // The client expects a JSON response + return c.Status(fiber.StatusOK).JSON(summary) + } else { + // Render index + return c.Render("views/index", summary) + } + }) + +} + +func renderEngine() *fiberhtml.Engine { + engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html") + engine.AddFuncMap(sprig.FuncMap()) + engine.AddFunc("MDToHTML", markDowner) + return engine +} + +func markDowner(args ...interface{}) template.HTML { + s := blackfriday.MarkdownCommon([]byte(fmt.Sprintf("%s", args...))) + return template.HTML(s) +} diff --git a/core/http/views/index.html b/core/http/views/index.html index ad14f667..287ee1ce 100644 --- a/core/http/views/index.html +++ b/core/http/views/index.html @@ -1,56 +1,50 @@ - - - - {{.Title}} - - - - - - +{{template "views/partials/head" .}} +
{{template "views/partials/navbar" .}}
-

Welcome to your LocalAI instance!

+

Welcome to your LocalAI instance!

- +

The FOSS alternative to OpenAI, Claude, ...

- Documentation + + Documentation +
-

Installed models

+

Installed models

We have {{len .ModelsConfig}} pre-loaded models available.

-
    +
      {{ range .ModelsConfig }} -
    • -

      {{.Name}}

      - {{ if .Usage }} -
      {{.Usage}}
      - {{ end }} - {{ if .Description }} -

      {{.Description}}

      - {{ end }} +
    • +
      +

      {{.Name}}

      + {{ if .Backend }} + + + {{.Backend}} + + {{ else }} + + auto + + {{ end }} +
      +
    • {{ end }}
- + {{template "views/partials/footer" .}}
diff --git a/go.mod b/go.mod index 79068904..0ac0d8e5 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,8 @@ go 1.21 require ( github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf + github.com/Masterminds/sprig/v3 v3.2.3 + github.com/charmbracelet/glamour v0.6.0 github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df github.com/fsnotify/fsnotify v1.7.0 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e @@ -11,6 +13,8 @@ require ( github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 github.com/gofiber/fiber/v2 v2.50.0 + github.com/gofiber/template/html/v2 v2.1.1 + github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 github.com/google/uuid v1.3.1 github.com/hashicorp/go-multierror v1.1.1 github.com/hpcloud/tail v1.0.0 @@ -21,6 +25,7 @@ require ( github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 github.com/onsi/ginkgo/v2 v2.13.0 github.com/onsi/gomega v1.28.1 + github.com/ory/dockertest/v3 v3.10.0 github.com/otiai10/openaigo v1.6.0 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.17.0 @@ -56,7 +61,6 @@ require ( github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.2.0 // indirect - github.com/Masterminds/sprig/v3 v3.2.3 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect github.com/alecthomas/chroma v0.10.0 // indirect @@ -65,7 +69,6 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/charmbracelet/glamour v0.6.0 // indirect github.com/containerd/continuity v0.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect @@ -76,7 +79,6 @@ require ( github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gofiber/template v1.8.3 // indirect - github.com/gofiber/template/html/v2 v2.1.1 // indirect github.com/gofiber/utils v1.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -100,7 +102,6 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect github.com/opencontainers/runc v1.1.5 // indirect - github.com/ory/dockertest/v3 v3.10.0 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pkoukk/tiktoken-go v0.1.2 // indirect @@ -108,6 +109,7 @@ require ( github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect + github.com/russross/blackfriday v1.6.0 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/cast v1.3.1 // indirect diff --git a/go.sum b/go.sum index a2c5b912..bec43989 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,6 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= -github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= @@ -43,6 +41,7 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -59,19 +58,14 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4= -github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= @@ -85,10 +79,10 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y= -github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc= github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY= github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= +github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= +github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= @@ -117,6 +111,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 h1:k4Tw0nt6lwro3Uin8eqoET7MDA4JnT8YgbCjc/g5E3k= +github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -164,6 +160,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394= +github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= @@ -184,8 +182,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo= github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4= github.com/microcosm-cc/bluemonday v1.0.21/go.mod h1:ytNkv4RrDrLJ2pqlsSI46O6IVXmZOBBD4SaJyDwwTkM= -github.com/microcosm-cc/bluemonday v1.0.24 h1:NGQoPtwGVcbGkKfvyYk1yRqknzBuoMiUrO6R7uFTPlw= -github.com/microcosm-cc/bluemonday v1.0.24/go.mod h1:ArQySAMps0790cHSkdPEJ7bGkF2VePWH773hsJNSHf8= github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58= github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= @@ -200,18 +196,12 @@ github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdx github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk= github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= -github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0= -github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks= -github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0= github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= @@ -273,11 +263,11 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= +github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= +github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sashabaranov/go-openai v1.16.0 h1:34W6WV84ey6OpW0p2UewZkdMu82AxGC+BzpU6iiauRw= -github.com/sashabaranov/go-openai v1.16.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= @@ -397,6 +387,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -430,8 +422,6 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -492,3 +482,5 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= +gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo= +gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A= From b9c5e14e2c9a9ac9a9f3292db8914dbdbbe8f06e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:13:38 +0100 Subject: [PATCH 0209/2895] :arrow_up: Update ggerganov/llama.cpp (#1923) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3258bbe3..4323e4eb 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a016026a3ac16d8c9b993a3573f19b9556d67de4 +CPPLLAMA_VERSION?=5106ef482c65ac60ac14da9a68c7b37bca4c6993 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From ab2f403dd0716e1c167389a3e69486891c5444b8 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:13:59 +0100 Subject: [PATCH 0210/2895] :arrow_up: Update ggerganov/whisper.cpp (#1924) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4323e4eb..dcaa1227 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=2948c740a2bf43190b8e3badb6f1e147f11f96d1 +WHISPER_CPP_VERSION?=fc366b807a17dc05813a6fcc13c8c4dfd442fa6a # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 123a5a2e160ad0053f26b8a75d8b3f3bbd0c2c2d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 29 Mar 2024 22:29:33 +0100 Subject: [PATCH 0211/2895] feat(swagger): Add swagger API doc (#1926) * makefile(build): add minimal and api build target * feat(swagger): Add swagger --- Makefile | 10 + core/http/api.go | 17 + core/http/endpoints/elevenlabs/tts.go | 6 + core/http/endpoints/localai/tts.go | 5 + core/http/endpoints/openai/assistant.go | 16 +- core/http/endpoints/openai/chat.go | 5 + core/http/endpoints/openai/completion.go | 6 +- core/http/endpoints/openai/embeddings.go | 6 +- core/http/endpoints/openai/image.go | 7 +- core/http/endpoints/openai/transcription.go | 8 +- core/http/views/partials/navbar.html | 1 + go.mod | 45 +- go.sum | 59 ++ main.go | 2 + swagger/docs.go | 801 ++++++++++++++++++++ swagger/swagger.json | 776 +++++++++++++++++++ swagger/swagger.yaml | 519 +++++++++++++ 17 files changed, 2264 insertions(+), 25 deletions(-) create mode 100644 swagger/docs.go create mode 100644 swagger/swagger.json create mode 100644 swagger/swagger.yaml diff --git a/Makefile b/Makefile index dcaa1227..440f5158 100644 --- a/Makefile +++ b/Makefile @@ -307,6 +307,12 @@ build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET}) CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ +build-minimal: + BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build + +build-api: + BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build + dist: build mkdir -p release cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) @@ -584,3 +590,7 @@ docker-image-intel-xpu: --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . + +.PHONY: swagger +swagger: + swag init -g core/http/api.go --output swagger diff --git a/core/http/api.go b/core/http/api.go index 24216737..ff413b0a 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/gofiber/swagger" // swagger handler "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" @@ -42,6 +43,20 @@ func readAuthHeader(c *fiber.Ctx) string { return authHeader } +// @title LocalAI API +// @version 2.0.0 +// @description The LocalAI Rest API. +// @termsOfService +// @contact.name LocalAI +// @contact.url https://localai.io +// @license.name MIT +// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE +// @host localhost:8080 +// @BasePath / +// @securityDefinitions.apikey BearerAuth +// @in header +// @name Authorization + func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ @@ -170,6 +185,8 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + app.Get("/swagger/*", swagger.HandlerDefault) // default + welcomeRoute( app, cl, diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index b70c8de4..841f9b5f 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -11,6 +11,12 @@ import ( "github.com/rs/zerolog/log" ) +// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech +// @Summary Generates audio from the input text. +// @Param voice-id path string true "Account ID" +// @Param request body schema.TTSRequest true "query params" +// @Success 200 {string} binary "Response" +// @Router /v1/text-to-speech/{voice-id} [post] func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 508a29ab..7822e024 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -11,6 +11,11 @@ import ( "github.com/rs/zerolog/log" ) +// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech +// @Summary Generates audio from the input text. +// @Param request body schema.TTSRequest true "query params" +// @Success 200 {string} binary "Response" +// @Router /v1/audio/speech [post] func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index 0e0d8a99..dceb3789 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -2,17 +2,18 @@ package openai import ( "fmt" - "github.com/go-skynet/LocalAI/core/config" - model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/gofiber/fiber/v2" - "github.com/rs/zerolog/log" "net/http" "sort" "strconv" "strings" "sync/atomic" "time" + + "github.com/go-skynet/LocalAI/core/config" + model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" ) // ToolType defines a type for tool options @@ -65,6 +66,11 @@ type AssistantRequest struct { Metadata map[string]string `json:"metadata,omitempty"` } +// CreateAssistantEndpoint is the OpenAI Assistant API endpoint https://platform.openai.com/docs/api-reference/assistants/createAssistant +// @Summary Create an assistant with a model and instructions. +// @Param request body AssistantRequest true "query params" +// @Success 200 {object} Assistant "Response" +// @Router /v1/assistants [post] func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { request := new(AssistantRequest) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 383a2b77..c2e22962 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -20,6 +20,11 @@ import ( "github.com/valyala/fasthttp" ) +// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create +// @Summary Generate a chat completions for a given prompt and model. +// @Param request body schema.OpenAIRequest true "query params" +// @Success 200 {object} schema.OpenAIResponse "Response" +// @Router /v1/chat/completions [post] func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { emptyMessage := "" id := uuid.New().String() diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 9344f9fe..a67f0993 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -20,7 +20,11 @@ import ( "github.com/valyala/fasthttp" ) -// https://platform.openai.com/docs/api-reference/completions +// CompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions +// @Summary Generate completions for a given prompt and model. +// @Param request body schema.OpenAIRequest true "query params" +// @Success 200 {object} schema.OpenAIResponse "Response" +// @Router /v1/completions [post] func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { id := uuid.New().String() created := int(time.Now().Unix()) diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index 774b0a5e..eca34f79 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -16,7 +16,11 @@ import ( "github.com/rs/zerolog/log" ) -// https://platform.openai.com/docs/api-reference/embeddings +// EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings +// @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. +// @Param request body schema.OpenAIRequest true "query params" +// @Success 200 {object} schema.OpenAIResponse "Response" +// @Router /v1/embeddings [post] func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { model, input, err := readRequest(c, ml, appConfig, true) diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index d59b1051..9e806b3e 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -44,7 +44,7 @@ func downloadFile(url string) (string, error) { return out.Name(), err } -// https://platform.openai.com/docs/api-reference/images/create +// /* * @@ -59,6 +59,11 @@ func downloadFile(url string) (string, error) { * */ +// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create +// @Summary Creates an image given a prompt. +// @Param request body schema.OpenAIRequest true "query params" +// @Success 200 {object} schema.OpenAIResponse "Response" +// @Router /v1/images/generations [post] func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { m, input, err := readRequest(c, ml, appConfig, false) diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go index 403f8b02..c7dd39e7 100644 --- a/core/http/endpoints/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -16,7 +16,13 @@ import ( "github.com/rs/zerolog/log" ) -// https://platform.openai.com/docs/api-reference/audio/create +// TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create +// @Summary Transcribes audio into the input language. +// @accept multipart/form-data +// @Param model formData string true "model" +// @Param file formData file true "file" +// @Success 200 {object} map[string]string "Response" +// @Router /v1/audio/transcriptions [post] func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { m, input, err := readRequest(c, ml, appConfig, false) diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index 2717f974..c3d3223f 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -9,6 +9,7 @@ diff --git a/go.mod b/go.mod index 0ac0d8e5..4dd207c7 100644 --- a/go.mod +++ b/go.mod @@ -12,10 +12,10 @@ require ( github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 - github.com/gofiber/fiber/v2 v2.50.0 + github.com/gofiber/fiber/v2 v2.52.0 github.com/gofiber/template/html/v2 v2.1.1 github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 - github.com/google/uuid v1.3.1 + github.com/google/uuid v1.5.0 github.com/hashicorp/go-multierror v1.1.1 github.com/hpcloud/tail v1.0.0 github.com/imdario/mergo v0.3.16 @@ -32,10 +32,10 @@ require ( github.com/rs/zerolog v1.31.0 github.com/sashabaranov/go-openai v1.20.4 github.com/schollz/progressbar/v3 v3.13.1 - github.com/stretchr/testify v1.8.4 + github.com/stretchr/testify v1.9.0 github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701 - github.com/urfave/cli/v2 v2.25.7 - github.com/valyala/fasthttp v1.50.0 + github.com/urfave/cli/v2 v2.27.1 + github.com/valyala/fasthttp v1.51.0 go.opentelemetry.io/otel v1.19.0 go.opentelemetry.io/otel/exporters/prometheus v0.42.0 go.opentelemetry.io/otel/metric v1.19.0 @@ -59,10 +59,13 @@ require ( require ( github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect + github.com/KyleBanks/depth v1.2.1 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect + github.com/PuerkitoBio/purell v1.2.1 // indirect + github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/aymanbagabas/go-osc52 v1.0.3 // indirect github.com/aymerick/douceur v0.2.0 // indirect @@ -78,6 +81,11 @@ require ( github.com/docker/go-units v0.4.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/spec v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/gofiber/swagger v1.0.0 // indirect github.com/gofiber/template v1.8.3 // indirect github.com/gofiber/utils v1.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -86,8 +94,10 @@ require ( github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gorilla/css v1.0.0 // indirect github.com/huandu/xstrings v1.3.3 // indirect + github.com/josharian/intern v1.0.0 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/microcosm-cc/bluemonday v1.0.26 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect @@ -113,6 +123,8 @@ require ( github.com/shopspring/decimal v1.2.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/cast v1.3.1 // indirect + github.com/swaggo/files/v2 v2.0.0 // indirect + github.com/swaggo/swag v1.16.3 // indirect github.com/ulikunitz/xz v0.5.9 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect @@ -122,17 +134,18 @@ require ( github.com/yuin/goldmark-emoji v1.0.1 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect - golang.org/x/crypto v0.14.0 // indirect - golang.org/x/mod v0.12.0 // indirect - golang.org/x/term v0.13.0 // indirect + golang.org/x/crypto v0.21.0 // indirect + golang.org/x/mod v0.16.0 // indirect + golang.org/x/term v0.18.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) require ( github.com/andybalholm/brotli v1.0.5 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/go-audio/audio v1.0.0 // indirect github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect @@ -140,18 +153,18 @@ require ( github.com/google/go-cmp v0.6.0 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect - github.com/klauspost/compress v1.16.7 // indirect + github.com/klauspost/compress v1.17.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.19 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 github.com/rivo/uniseg v0.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect - github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect - golang.org/x/net v0.17.0 // indirect - golang.org/x/sys v0.17.0 // indirect - golang.org/x/text v0.13.0 // indirect - golang.org/x/tools v0.12.0 // indirect + github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect + golang.org/x/net v0.22.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/tools v0.19.0 // indirect ) diff --git a/go.sum b/go.sum index bec43989..f81f10c8 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= +github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= @@ -11,6 +13,10 @@ github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2y github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= +github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28= +github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= @@ -41,6 +47,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= @@ -79,6 +87,14 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY= +github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY= github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= @@ -90,6 +106,10 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw= github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw= +github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE= +github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= +github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= +github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg= github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc= github.com/gofiber/template v1.8.3/go.mod h1:bs/2n0pSNPOkRa5VJ8zTIvedcI/lEYxzV3+YPXdBvq8= github.com/gofiber/template/html/v2 v2.1.1 h1:QEy3O3EBkvwDthy5bXVGUseOyO6ldJoiDxlF4+MJiV8= @@ -129,6 +149,8 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= @@ -143,6 +165,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1: github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= @@ -150,6 +174,8 @@ github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0 github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM= +github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= @@ -166,12 +192,16 @@ github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69 github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= @@ -300,6 +330,12 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw= +github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM= +github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg= +github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= @@ -315,10 +351,14 @@ github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs= github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= +github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e9M= github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA= +github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= +github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= @@ -333,6 +373,8 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw= +github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -361,11 +403,15 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= +golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -381,6 +427,8 @@ golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfS golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= +golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -389,6 +437,7 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -424,12 +473,16 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -437,6 +490,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -446,6 +501,8 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= +golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= +golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -484,3 +541,5 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo= gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/main.go b/main.go index f000aa71..0d8befcb 100644 --- a/main.go +++ b/main.go @@ -25,6 +25,8 @@ import ( "github.com/rs/zerolog/log" progressbar "github.com/schollz/progressbar/v3" "github.com/urfave/cli/v2" + + _ "github.com/go-skynet/LocalAI/swagger" ) const ( diff --git a/swagger/docs.go b/swagger/docs.go new file mode 100644 index 00000000..a922fa2e --- /dev/null +++ b/swagger/docs.go @@ -0,0 +1,801 @@ +// Code generated by swaggo/swag. DO NOT EDIT. + +package swagger + +import "github.com/swaggo/swag" + +const docTemplate = `{ + "schemes": {{ marshal .Schemes }}, + "swagger": "2.0", + "info": { + "description": "{{escape .Description}}", + "title": "{{.Title}}", + "contact": { + "name": "OpenAI Support", + "url": "https://help.openai.com/" + }, + "license": { + "name": "MIT", + "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE" + }, + "version": "{{.Version}}" + }, + "host": "{{.Host}}", + "basePath": "{{.BasePath}}", + "paths": { + "/v1/assistants": { + "post": { + "summary": "Create an assistant with a model and instructions.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/openai.AssistantRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/openai.Assistant" + } + } + } + } + }, + "/v1/audio/speech": { + "post": { + "summary": "Generates audio from the input text.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "type": "string" + } + } + } + } + }, + "/v1/audio/transcriptions": { + "post": { + "consumes": [ + "multipart/form-data" + ], + "summary": "Transcribes audio into the input language.", + "parameters": [ + { + "type": "string", + "description": "model", + "name": "model", + "in": "formData", + "required": true + }, + { + "type": "file", + "description": "file", + "name": "file", + "in": "formData", + "required": true + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/v1/chat/completions": { + "post": { + "summary": "Generate a chat completions for a given prompt and model.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/completions": { + "post": { + "summary": "Generate completions for a given prompt and model.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/embeddings": { + "post": { + "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/images/generations": { + "post": { + "summary": "Creates an image given a prompt.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/text-to-speech/{voice-id}": { + "post": { + "summary": "Generates audio from the input text.", + "parameters": [ + { + "type": "string", + "description": "Account ID", + "name": "voice-id", + "in": "path", + "required": true + }, + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "type": "string" + } + } + } + } + } + }, + "definitions": { + "grammar.Argument": { + "type": "object", + "properties": { + "properties": { + "type": "object", + "additionalProperties": true + }, + "type": { + "type": "string" + } + } + }, + "grammar.Function": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "name": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "grammar.FunctionName": { + "type": "object", + "properties": { + "const": { + "type": "string" + } + } + }, + "grammar.Item": { + "type": "object", + "properties": { + "properties": { + "$ref": "#/definitions/grammar.Properties" + }, + "type": { + "type": "string" + } + } + }, + "grammar.JSONFunctionStructure": { + "type": "object", + "properties": { + "$defs": { + "type": "object", + "additionalProperties": true + }, + "anyOf": { + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Item" + } + }, + "oneOf": { + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Item" + } + } + } + }, + "grammar.Properties": { + "type": "object", + "properties": { + "arguments": { + "$ref": "#/definitions/grammar.Argument" + }, + "function": { + "$ref": "#/definitions/grammar.FunctionName" + } + } + }, + "grammar.Tool": { + "type": "object", + "properties": { + "function": { + "$ref": "#/definitions/grammar.Function" + }, + "type": { + "type": "string" + } + } + }, + "openai.Assistant": { + "type": "object", + "properties": { + "created": { + "description": "The time at which the assistant was created.", + "type": "integer" + }, + "description": { + "description": "The description of the assistant.", + "type": "string" + }, + "file_ids": { + "description": "A list of file IDs attached to this assistant.", + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "description": "The unique identifier of the assistant.", + "type": "string" + }, + "instructions": { + "description": "The system instructions that the assistant uses.", + "type": "string" + }, + "metadata": { + "description": "Set of key-value pairs attached to the assistant.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "description": "The model ID used by the assistant.", + "type": "string" + }, + "name": { + "description": "The name of the assistant.", + "type": "string" + }, + "object": { + "description": "Object type, which is \"assistant\".", + "type": "string" + }, + "tools": { + "description": "A list of tools enabled on the assistant.", + "type": "array", + "items": { + "$ref": "#/definitions/openai.Tool" + } + } + } + }, + "openai.AssistantRequest": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "file_ids": { + "type": "array", + "items": { + "type": "string" + } + }, + "instructions": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "name": { + "type": "string" + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/definitions/openai.Tool" + } + } + } + }, + "openai.Tool": { + "type": "object", + "properties": { + "type": { + "$ref": "#/definitions/openai.ToolType" + } + } + }, + "openai.ToolType": { + "type": "string", + "enum": [ + "code_interpreter", + "retrieval", + "function" + ], + "x-enum-varnames": [ + "CodeInterpreter", + "Retrieval", + "Function" + ] + }, + "schema.ChatCompletionResponseFormat": { + "type": "object", + "properties": { + "type": { + "type": "string" + } + } + }, + "schema.Choice": { + "type": "object", + "properties": { + "delta": { + "$ref": "#/definitions/schema.Message" + }, + "finish_reason": { + "type": "string" + }, + "index": { + "type": "integer" + }, + "message": { + "$ref": "#/definitions/schema.Message" + }, + "text": { + "type": "string" + } + } + }, + "schema.FunctionCall": { + "type": "object", + "properties": { + "arguments": { + "type": "string" + }, + "name": { + "type": "string" + } + } + }, + "schema.Item": { + "type": "object", + "properties": { + "b64_json": { + "type": "string" + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + } + }, + "index": { + "type": "integer" + }, + "object": { + "type": "string" + }, + "url": { + "description": "Images", + "type": "string" + } + } + }, + "schema.Message": { + "type": "object", + "properties": { + "content": { + "description": "The message content" + }, + "function_call": { + "description": "A result of a function call" + }, + "name": { + "description": "The message name (used for tools calls)", + "type": "string" + }, + "role": { + "description": "The message role", + "type": "string" + }, + "string_content": { + "type": "string" + }, + "string_images": { + "type": "array", + "items": { + "type": "string" + } + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.ToolCall" + } + } + } + }, + "schema.OpenAIRequest": { + "type": "object", + "required": [ + "file" + ], + "properties": { + "backend": { + "type": "string" + }, + "batch": { + "description": "Custom parameters - not present in the OpenAI API", + "type": "integer" + }, + "clip_skip": { + "description": "Diffusers", + "type": "integer" + }, + "echo": { + "type": "boolean" + }, + "file": { + "description": "whisper", + "type": "string" + }, + "frequency_penalty": { + "type": "number" + }, + "function_call": { + "description": "might be a string or an object" + }, + "functions": { + "description": "A list of available functions to call", + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Function" + } + }, + "grammar": { + "description": "A grammar to constrain the LLM output", + "type": "string" + }, + "grammar_json_functions": { + "$ref": "#/definitions/grammar.JSONFunctionStructure" + }, + "ignore_eos": { + "type": "boolean" + }, + "input": {}, + "instruction": { + "description": "Edit endpoint", + "type": "string" + }, + "language": { + "description": "Also part of the OpenAI official spec", + "type": "string" + }, + "max_tokens": { + "type": "integer" + }, + "messages": { + "description": "Messages is read only by chat/completion API calls", + "type": "array", + "items": { + "$ref": "#/definitions/schema.Message" + } + }, + "mode": { + "description": "Image (not supported by OpenAI)", + "type": "integer" + }, + "model": { + "description": "Also part of the OpenAI official spec", + "type": "string" + }, + "model_base_name": { + "description": "AutoGPTQ", + "type": "string" + }, + "n": { + "description": "Also part of the OpenAI official spec. use it for returning multiple results", + "type": "integer" + }, + "n_keep": { + "type": "integer" + }, + "negative_prompt": { + "type": "string" + }, + "negative_prompt_scale": { + "type": "number" + }, + "presence_penalty": { + "type": "number" + }, + "prompt": { + "description": "Prompt is read only by completion/image API calls" + }, + "repeat_penalty": { + "type": "number" + }, + "response_format": { + "description": "whisper/image", + "allOf": [ + { + "$ref": "#/definitions/schema.ChatCompletionResponseFormat" + } + ] + }, + "rope_freq_base": { + "type": "number" + }, + "rope_freq_scale": { + "type": "number" + }, + "seed": { + "type": "integer" + }, + "size": { + "description": "image", + "type": "string" + }, + "step": { + "type": "integer" + }, + "stop": {}, + "stream": { + "type": "boolean" + }, + "temperature": { + "type": "number" + }, + "tfz": { + "type": "number" + }, + "tokenizer": { + "description": "RWKV (?)", + "type": "string" + }, + "tool_choice": {}, + "tools": { + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Tool" + } + }, + "top_k": { + "type": "integer" + }, + "top_p": { + "description": "Common options between all the API calls, part of the OpenAI spec", + "type": "number" + }, + "typical_p": { + "type": "number" + }, + "use_fast_tokenizer": { + "description": "AutoGPTQ", + "type": "boolean" + } + } + }, + "schema.OpenAIResponse": { + "type": "object", + "properties": { + "choices": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Choice" + } + }, + "created": { + "type": "integer" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Item" + } + }, + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "object": { + "type": "string" + }, + "usage": { + "$ref": "#/definitions/schema.OpenAIUsage" + } + } + }, + "schema.OpenAIUsage": { + "type": "object", + "properties": { + "completion_tokens": { + "type": "integer" + }, + "prompt_tokens": { + "type": "integer" + }, + "total_tokens": { + "type": "integer" + } + } + }, + "schema.TTSRequest": { + "type": "object", + "properties": { + "backend": { + "type": "string" + }, + "input": { + "type": "string" + }, + "model": { + "type": "string" + }, + "voice": { + "type": "string" + } + } + }, + "schema.ToolCall": { + "type": "object", + "properties": { + "function": { + "$ref": "#/definitions/schema.FunctionCall" + }, + "id": { + "type": "string" + }, + "index": { + "type": "integer" + }, + "type": { + "type": "string" + } + } + } + }, + "securityDefinitions": { + "BearerAuth": { + "type": "apiKey", + "name": "Authorization", + "in": "header" + } + } +}` + +// SwaggerInfo holds exported Swagger Info so clients can modify it +var SwaggerInfo = &swag.Spec{ + Version: "2.0.0", + Host: "localhost:8080", + BasePath: "/", + Schemes: []string{}, + Title: "LocalAI API", + Description: "The OpenAI REST API.", + InfoInstanceName: "swagger", + SwaggerTemplate: docTemplate, + LeftDelim: "{{", + RightDelim: "}}", +} + +func init() { + swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo) +} diff --git a/swagger/swagger.json b/swagger/swagger.json new file mode 100644 index 00000000..c9088d93 --- /dev/null +++ b/swagger/swagger.json @@ -0,0 +1,776 @@ +{ + "swagger": "2.0", + "info": { + "description": "The OpenAI REST API.", + "title": "LocalAI API", + "contact": { + "name": "OpenAI Support", + "url": "https://help.openai.com/" + }, + "license": { + "name": "MIT", + "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE" + }, + "version": "2.0.0" + }, + "host": "localhost:8080", + "basePath": "/", + "paths": { + "/v1/assistants": { + "post": { + "summary": "Create an assistant with a model and instructions.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/openai.AssistantRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/openai.Assistant" + } + } + } + } + }, + "/v1/audio/speech": { + "post": { + "summary": "Generates audio from the input text.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "type": "string" + } + } + } + } + }, + "/v1/audio/transcriptions": { + "post": { + "consumes": [ + "multipart/form-data" + ], + "summary": "Transcribes audio into the input language.", + "parameters": [ + { + "type": "string", + "description": "model", + "name": "model", + "in": "formData", + "required": true + }, + { + "type": "file", + "description": "file", + "name": "file", + "in": "formData", + "required": true + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/v1/chat/completions": { + "post": { + "summary": "Generate a chat completions for a given prompt and model.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/completions": { + "post": { + "summary": "Generate completions for a given prompt and model.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/embeddings": { + "post": { + "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/images/generations": { + "post": { + "summary": "Creates an image given a prompt.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.OpenAIRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.OpenAIResponse" + } + } + } + } + }, + "/v1/text-to-speech/{voice-id}": { + "post": { + "summary": "Generates audio from the input text.", + "parameters": [ + { + "type": "string", + "description": "Account ID", + "name": "voice-id", + "in": "path", + "required": true + }, + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "Response", + "schema": { + "type": "string" + } + } + } + } + } + }, + "definitions": { + "grammar.Argument": { + "type": "object", + "properties": { + "properties": { + "type": "object", + "additionalProperties": true + }, + "type": { + "type": "string" + } + } + }, + "grammar.Function": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "name": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "grammar.FunctionName": { + "type": "object", + "properties": { + "const": { + "type": "string" + } + } + }, + "grammar.Item": { + "type": "object", + "properties": { + "properties": { + "$ref": "#/definitions/grammar.Properties" + }, + "type": { + "type": "string" + } + } + }, + "grammar.JSONFunctionStructure": { + "type": "object", + "properties": { + "$defs": { + "type": "object", + "additionalProperties": true + }, + "anyOf": { + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Item" + } + }, + "oneOf": { + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Item" + } + } + } + }, + "grammar.Properties": { + "type": "object", + "properties": { + "arguments": { + "$ref": "#/definitions/grammar.Argument" + }, + "function": { + "$ref": "#/definitions/grammar.FunctionName" + } + } + }, + "grammar.Tool": { + "type": "object", + "properties": { + "function": { + "$ref": "#/definitions/grammar.Function" + }, + "type": { + "type": "string" + } + } + }, + "openai.Assistant": { + "type": "object", + "properties": { + "created": { + "description": "The time at which the assistant was created.", + "type": "integer" + }, + "description": { + "description": "The description of the assistant.", + "type": "string" + }, + "file_ids": { + "description": "A list of file IDs attached to this assistant.", + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "description": "The unique identifier of the assistant.", + "type": "string" + }, + "instructions": { + "description": "The system instructions that the assistant uses.", + "type": "string" + }, + "metadata": { + "description": "Set of key-value pairs attached to the assistant.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "description": "The model ID used by the assistant.", + "type": "string" + }, + "name": { + "description": "The name of the assistant.", + "type": "string" + }, + "object": { + "description": "Object type, which is \"assistant\".", + "type": "string" + }, + "tools": { + "description": "A list of tools enabled on the assistant.", + "type": "array", + "items": { + "$ref": "#/definitions/openai.Tool" + } + } + } + }, + "openai.AssistantRequest": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "file_ids": { + "type": "array", + "items": { + "type": "string" + } + }, + "instructions": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "name": { + "type": "string" + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/definitions/openai.Tool" + } + } + } + }, + "openai.Tool": { + "type": "object", + "properties": { + "type": { + "$ref": "#/definitions/openai.ToolType" + } + } + }, + "openai.ToolType": { + "type": "string", + "enum": [ + "code_interpreter", + "retrieval", + "function" + ], + "x-enum-varnames": [ + "CodeInterpreter", + "Retrieval", + "Function" + ] + }, + "schema.ChatCompletionResponseFormat": { + "type": "object", + "properties": { + "type": { + "type": "string" + } + } + }, + "schema.Choice": { + "type": "object", + "properties": { + "delta": { + "$ref": "#/definitions/schema.Message" + }, + "finish_reason": { + "type": "string" + }, + "index": { + "type": "integer" + }, + "message": { + "$ref": "#/definitions/schema.Message" + }, + "text": { + "type": "string" + } + } + }, + "schema.FunctionCall": { + "type": "object", + "properties": { + "arguments": { + "type": "string" + }, + "name": { + "type": "string" + } + } + }, + "schema.Item": { + "type": "object", + "properties": { + "b64_json": { + "type": "string" + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + } + }, + "index": { + "type": "integer" + }, + "object": { + "type": "string" + }, + "url": { + "description": "Images", + "type": "string" + } + } + }, + "schema.Message": { + "type": "object", + "properties": { + "content": { + "description": "The message content" + }, + "function_call": { + "description": "A result of a function call" + }, + "name": { + "description": "The message name (used for tools calls)", + "type": "string" + }, + "role": { + "description": "The message role", + "type": "string" + }, + "string_content": { + "type": "string" + }, + "string_images": { + "type": "array", + "items": { + "type": "string" + } + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.ToolCall" + } + } + } + }, + "schema.OpenAIRequest": { + "type": "object", + "required": [ + "file" + ], + "properties": { + "backend": { + "type": "string" + }, + "batch": { + "description": "Custom parameters - not present in the OpenAI API", + "type": "integer" + }, + "clip_skip": { + "description": "Diffusers", + "type": "integer" + }, + "echo": { + "type": "boolean" + }, + "file": { + "description": "whisper", + "type": "string" + }, + "frequency_penalty": { + "type": "number" + }, + "function_call": { + "description": "might be a string or an object" + }, + "functions": { + "description": "A list of available functions to call", + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Function" + } + }, + "grammar": { + "description": "A grammar to constrain the LLM output", + "type": "string" + }, + "grammar_json_functions": { + "$ref": "#/definitions/grammar.JSONFunctionStructure" + }, + "ignore_eos": { + "type": "boolean" + }, + "input": {}, + "instruction": { + "description": "Edit endpoint", + "type": "string" + }, + "language": { + "description": "Also part of the OpenAI official spec", + "type": "string" + }, + "max_tokens": { + "type": "integer" + }, + "messages": { + "description": "Messages is read only by chat/completion API calls", + "type": "array", + "items": { + "$ref": "#/definitions/schema.Message" + } + }, + "mode": { + "description": "Image (not supported by OpenAI)", + "type": "integer" + }, + "model": { + "description": "Also part of the OpenAI official spec", + "type": "string" + }, + "model_base_name": { + "description": "AutoGPTQ", + "type": "string" + }, + "n": { + "description": "Also part of the OpenAI official spec. use it for returning multiple results", + "type": "integer" + }, + "n_keep": { + "type": "integer" + }, + "negative_prompt": { + "type": "string" + }, + "negative_prompt_scale": { + "type": "number" + }, + "presence_penalty": { + "type": "number" + }, + "prompt": { + "description": "Prompt is read only by completion/image API calls" + }, + "repeat_penalty": { + "type": "number" + }, + "response_format": { + "description": "whisper/image", + "allOf": [ + { + "$ref": "#/definitions/schema.ChatCompletionResponseFormat" + } + ] + }, + "rope_freq_base": { + "type": "number" + }, + "rope_freq_scale": { + "type": "number" + }, + "seed": { + "type": "integer" + }, + "size": { + "description": "image", + "type": "string" + }, + "step": { + "type": "integer" + }, + "stop": {}, + "stream": { + "type": "boolean" + }, + "temperature": { + "type": "number" + }, + "tfz": { + "type": "number" + }, + "tokenizer": { + "description": "RWKV (?)", + "type": "string" + }, + "tool_choice": {}, + "tools": { + "type": "array", + "items": { + "$ref": "#/definitions/grammar.Tool" + } + }, + "top_k": { + "type": "integer" + }, + "top_p": { + "description": "Common options between all the API calls, part of the OpenAI spec", + "type": "number" + }, + "typical_p": { + "type": "number" + }, + "use_fast_tokenizer": { + "description": "AutoGPTQ", + "type": "boolean" + } + } + }, + "schema.OpenAIResponse": { + "type": "object", + "properties": { + "choices": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Choice" + } + }, + "created": { + "type": "integer" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Item" + } + }, + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "object": { + "type": "string" + }, + "usage": { + "$ref": "#/definitions/schema.OpenAIUsage" + } + } + }, + "schema.OpenAIUsage": { + "type": "object", + "properties": { + "completion_tokens": { + "type": "integer" + }, + "prompt_tokens": { + "type": "integer" + }, + "total_tokens": { + "type": "integer" + } + } + }, + "schema.TTSRequest": { + "type": "object", + "properties": { + "backend": { + "type": "string" + }, + "input": { + "type": "string" + }, + "model": { + "type": "string" + }, + "voice": { + "type": "string" + } + } + }, + "schema.ToolCall": { + "type": "object", + "properties": { + "function": { + "$ref": "#/definitions/schema.FunctionCall" + }, + "id": { + "type": "string" + }, + "index": { + "type": "integer" + }, + "type": { + "type": "string" + } + } + } + }, + "securityDefinitions": { + "BearerAuth": { + "type": "apiKey", + "name": "Authorization", + "in": "header" + } + } +} \ No newline at end of file diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml new file mode 100644 index 00000000..b33e1fab --- /dev/null +++ b/swagger/swagger.yaml @@ -0,0 +1,519 @@ +basePath: / +definitions: + grammar.Argument: + properties: + properties: + additionalProperties: true + type: object + type: + type: string + type: object + grammar.Function: + properties: + description: + type: string + name: + type: string + parameters: + additionalProperties: true + type: object + type: object + grammar.FunctionName: + properties: + const: + type: string + type: object + grammar.Item: + properties: + properties: + $ref: '#/definitions/grammar.Properties' + type: + type: string + type: object + grammar.JSONFunctionStructure: + properties: + $defs: + additionalProperties: true + type: object + anyOf: + items: + $ref: '#/definitions/grammar.Item' + type: array + oneOf: + items: + $ref: '#/definitions/grammar.Item' + type: array + type: object + grammar.Properties: + properties: + arguments: + $ref: '#/definitions/grammar.Argument' + function: + $ref: '#/definitions/grammar.FunctionName' + type: object + grammar.Tool: + properties: + function: + $ref: '#/definitions/grammar.Function' + type: + type: string + type: object + openai.Assistant: + properties: + created: + description: The time at which the assistant was created. + type: integer + description: + description: The description of the assistant. + type: string + file_ids: + description: A list of file IDs attached to this assistant. + items: + type: string + type: array + id: + description: The unique identifier of the assistant. + type: string + instructions: + description: The system instructions that the assistant uses. + type: string + metadata: + additionalProperties: + type: string + description: Set of key-value pairs attached to the assistant. + type: object + model: + description: The model ID used by the assistant. + type: string + name: + description: The name of the assistant. + type: string + object: + description: Object type, which is "assistant". + type: string + tools: + description: A list of tools enabled on the assistant. + items: + $ref: '#/definitions/openai.Tool' + type: array + type: object + openai.AssistantRequest: + properties: + description: + type: string + file_ids: + items: + type: string + type: array + instructions: + type: string + metadata: + additionalProperties: + type: string + type: object + model: + type: string + name: + type: string + tools: + items: + $ref: '#/definitions/openai.Tool' + type: array + type: object + openai.Tool: + properties: + type: + $ref: '#/definitions/openai.ToolType' + type: object + openai.ToolType: + enum: + - code_interpreter + - retrieval + - function + type: string + x-enum-varnames: + - CodeInterpreter + - Retrieval + - Function + schema.ChatCompletionResponseFormat: + properties: + type: + type: string + type: object + schema.Choice: + properties: + delta: + $ref: '#/definitions/schema.Message' + finish_reason: + type: string + index: + type: integer + message: + $ref: '#/definitions/schema.Message' + text: + type: string + type: object + schema.FunctionCall: + properties: + arguments: + type: string + name: + type: string + type: object + schema.Item: + properties: + b64_json: + type: string + embedding: + items: + type: number + type: array + index: + type: integer + object: + type: string + url: + description: Images + type: string + type: object + schema.Message: + properties: + content: + description: The message content + function_call: + description: A result of a function call + name: + description: The message name (used for tools calls) + type: string + role: + description: The message role + type: string + string_content: + type: string + string_images: + items: + type: string + type: array + tool_calls: + items: + $ref: '#/definitions/schema.ToolCall' + type: array + type: object + schema.OpenAIRequest: + properties: + backend: + type: string + batch: + description: Custom parameters - not present in the OpenAI API + type: integer + clip_skip: + description: Diffusers + type: integer + echo: + type: boolean + file: + description: whisper + type: string + frequency_penalty: + type: number + function_call: + description: might be a string or an object + functions: + description: A list of available functions to call + items: + $ref: '#/definitions/grammar.Function' + type: array + grammar: + description: A grammar to constrain the LLM output + type: string + grammar_json_functions: + $ref: '#/definitions/grammar.JSONFunctionStructure' + ignore_eos: + type: boolean + input: {} + instruction: + description: Edit endpoint + type: string + language: + description: Also part of the OpenAI official spec + type: string + max_tokens: + type: integer + messages: + description: Messages is read only by chat/completion API calls + items: + $ref: '#/definitions/schema.Message' + type: array + mode: + description: Image (not supported by OpenAI) + type: integer + model: + description: Also part of the OpenAI official spec + type: string + model_base_name: + description: AutoGPTQ + type: string + "n": + description: Also part of the OpenAI official spec. use it for returning multiple + results + type: integer + n_keep: + type: integer + negative_prompt: + type: string + negative_prompt_scale: + type: number + presence_penalty: + type: number + prompt: + description: Prompt is read only by completion/image API calls + repeat_penalty: + type: number + response_format: + allOf: + - $ref: '#/definitions/schema.ChatCompletionResponseFormat' + description: whisper/image + rope_freq_base: + type: number + rope_freq_scale: + type: number + seed: + type: integer + size: + description: image + type: string + step: + type: integer + stop: {} + stream: + type: boolean + temperature: + type: number + tfz: + type: number + tokenizer: + description: RWKV (?) + type: string + tool_choice: {} + tools: + items: + $ref: '#/definitions/grammar.Tool' + type: array + top_k: + type: integer + top_p: + description: Common options between all the API calls, part of the OpenAI + spec + type: number + typical_p: + type: number + use_fast_tokenizer: + description: AutoGPTQ + type: boolean + required: + - file + type: object + schema.OpenAIResponse: + properties: + choices: + items: + $ref: '#/definitions/schema.Choice' + type: array + created: + type: integer + data: + items: + $ref: '#/definitions/schema.Item' + type: array + id: + type: string + model: + type: string + object: + type: string + usage: + $ref: '#/definitions/schema.OpenAIUsage' + type: object + schema.OpenAIUsage: + properties: + completion_tokens: + type: integer + prompt_tokens: + type: integer + total_tokens: + type: integer + type: object + schema.TTSRequest: + properties: + backend: + type: string + input: + type: string + model: + type: string + voice: + type: string + type: object + schema.ToolCall: + properties: + function: + $ref: '#/definitions/schema.FunctionCall' + id: + type: string + index: + type: integer + type: + type: string + type: object +host: localhost:8080 +info: + contact: + name: OpenAI Support + url: https://help.openai.com/ + description: The OpenAI REST API. + license: + name: MIT + url: https://github.com/openai/openai-openapi/blob/master/LICENSE + title: LocalAI API + version: 2.0.0 +paths: + /v1/assistants: + post: + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/openai.AssistantRequest' + responses: + "200": + description: Response + schema: + $ref: '#/definitions/openai.Assistant' + summary: Create an assistant with a model and instructions. + /v1/audio/speech: + post: + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.TTSRequest' + responses: + "200": + description: Response + schema: + type: string + summary: Generates audio from the input text. + /v1/audio/transcriptions: + post: + consumes: + - multipart/form-data + parameters: + - description: model + in: formData + name: model + required: true + type: string + - description: file + in: formData + name: file + required: true + type: file + responses: + "200": + description: Response + schema: + additionalProperties: + type: string + type: object + summary: Transcribes audio into the input language. + /v1/chat/completions: + post: + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.OpenAIRequest' + responses: + "200": + description: Response + schema: + $ref: '#/definitions/schema.OpenAIResponse' + summary: Generate a chat completions for a given prompt and model. + /v1/completions: + post: + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.OpenAIRequest' + responses: + "200": + description: Response + schema: + $ref: '#/definitions/schema.OpenAIResponse' + summary: Generate completions for a given prompt and model. + /v1/embeddings: + post: + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.OpenAIRequest' + responses: + "200": + description: Response + schema: + $ref: '#/definitions/schema.OpenAIResponse' + summary: Get a vector representation of a given input that can be easily consumed + by machine learning models and algorithms. + /v1/images/generations: + post: + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.OpenAIRequest' + responses: + "200": + description: Response + schema: + $ref: '#/definitions/schema.OpenAIResponse' + summary: Creates an image given a prompt. + /v1/text-to-speech/{voice-id}: + post: + parameters: + - description: Account ID + in: path + name: voice-id + required: true + type: string + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.TTSRequest' + responses: + "200": + description: Response + schema: + type: string + summary: Generates audio from the input text. +securityDefinitions: + BearerAuth: + in: header + name: Authorization + type: apiKey +swagger: "2.0" From a7fc89c2078f2c77e1a2e533c3faff59f9043ed6 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 29 Mar 2024 22:29:50 +0100 Subject: [PATCH 0212/2895] :arrow_up: Update ggerganov/whisper.cpp (#1927) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 440f5158..6c02b80e 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=fc366b807a17dc05813a6fcc13c8c4dfd442fa6a +WHISPER_CPP_VERSION?=1e8f28c42a1472ae7c49d0502ea06e2f5bc29a69 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 93702e39d420a39c7e4afa9fcd925e9eeb10492c Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Fri, 29 Mar 2024 16:32:40 -0500 Subject: [PATCH 0213/2895] feat(build): adjust number of parallel make jobs (#1915) * feat(build): adjust number of parallel make jobs * fix: update make on MacOS from brew to support --output-sync argument * fix: cache grpc with version as part of key to improve validity of cache hits * fix: use gmake for tests-apple to use the updated GNU make version * fix: actually use the new make version for tests-apple * feat: parallelize tests-extra * feat: attempt to cache grpc build for docker images * fix: don't quote GRPC version * fix: don't cache go modules, we have limited cache space, better used elsewhere * fix: release with the same version of go that we test with * fix: don't fail on exporting cache layers * fix: remove deprecated BUILD_GRPC docker arg from Makefile --- .github/workflows/image-pr.yml | 13 ++++++++--- .github/workflows/image.yml | 26 ++++++++++++++++++++-- .github/workflows/image_build.yml | 37 ++++++++++++++++++++++++++++--- .github/workflows/release.yaml | 20 +++++++++++------ .github/workflows/test-extra.yml | 36 +++++++++++++++--------------- .github/workflows/test.yml | 23 ++++++++++++------- Dockerfile | 37 ++++++++++++++++++++++++------- Makefile | 6 ++++- 8 files changed, 148 insertions(+), 50 deletions(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index aa59188c..b703b16d 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -22,7 +22,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} - makeflags: "--jobs=3 --output-sync=target" + makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -42,6 +42,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -52,6 +53,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' @@ -60,6 +62,7 @@ jobs: image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' @@ -68,6 +71,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -81,7 +85,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} - makeflags: "--jobs=3 --output-sync=target" + makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -98,6 +102,7 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" + makeflags: "--jobs=5 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' @@ -106,6 +111,7 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -115,4 +121,5 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" \ No newline at end of file + base-image: "ubuntu:22.04" + makeflags: "--jobs=5 --output-sync=target" \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 40deb0ec..79a38fc5 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -27,7 +27,7 @@ jobs: runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} aio: ${{ matrix.aio }} - makeflags: "--jobs=3 --output-sync=target" + makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -49,6 +49,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + makeflags: "--jobs=3 --output-sync=target" - build-type: '' platforms: 'linux/amd64' tag-latest: 'auto' @@ -57,6 +58,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -67,6 +69,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -77,6 +80,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -88,6 +92,7 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" aio: "-aio-gpu-nvidia-cuda-11" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -99,6 +104,7 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" aio: "-aio-gpu-nvidia-cuda-12" + makeflags: "--jobs=3 --output-sync=target" - build-type: '' #platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64' @@ -108,6 +114,7 @@ jobs: image-type: 'extras' base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'auto' @@ -117,6 +124,7 @@ jobs: aio: "-aio-gpu-hipblas" base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' @@ -125,6 +133,7 @@ jobs: image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'auto' @@ -134,6 +143,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' aio: "-aio-gpu-intel-f16" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'auto' @@ -143,6 +153,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' aio: "-aio-gpu-intel-f32" + makeflags: "--jobs=3 --output-sync=target" # Core images - build-type: 'sycl_f16' platforms: 'linux/amd64' @@ -152,6 +163,7 @@ jobs: ffmpeg: 'false' image-type: 'core' runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' @@ -160,6 +172,7 @@ jobs: ffmpeg: 'false' image-type: 'core' runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' @@ -168,6 +181,7 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' @@ -176,6 +190,7 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' @@ -184,6 +199,7 @@ jobs: image-type: 'core' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' @@ -192,6 +208,7 @@ jobs: image-type: 'core' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" core-image-build: uses: ./.github/workflows/image_build.yml @@ -207,7 +224,7 @@ jobs: runs-on: ${{ matrix.runs-on }} aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} - makeflags: "--jobs=3 --output-sync=target" + makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -225,6 +242,7 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' aio: "-aio-cpu" + makeflags: "--jobs=5 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -235,6 +253,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' + makeflags: "--jobs=5 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -245,6 +264,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' + makeflags: "--jobs=5 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -255,6 +275,7 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" + makeflags: "--jobs=5 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -265,3 +286,4 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" + makeflags: "--jobs=5 --output-sync=target" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 659f85de..d07df441 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -49,7 +49,7 @@ on: makeflags: description: 'Make Flags' required: false - default: '' + default: '--jobs=3 --output-sync=target' type: string aio: description: 'AIO Image Name' @@ -79,6 +79,7 @@ jobs: && sudo apt-get install -y git - name: Checkout uses: actions/checkout@v4 + - name: Release space from worker if: inputs.runs-on == 'ubuntu-latest' run: | @@ -120,6 +121,7 @@ jobs: sudo rm -rf "/usr/local/share/boost" || true sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true df -h + - name: Docker meta id: meta uses: docker/metadata-action@v5 @@ -134,6 +136,7 @@ jobs: flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.tag-suffix }} + - name: Docker meta AIO (quay.io) if: inputs.aio != '' id: meta_aio @@ -147,6 +150,7 @@ jobs: flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.aio }} + - name: Docker meta AIO (dockerhub) if: inputs.aio != '' id: meta_aio_dockerhub @@ -160,6 +164,7 @@ jobs: flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.aio }} + - name: Set up QEMU uses: docker/setup-qemu-action@master with: @@ -184,6 +189,25 @@ jobs: username: ${{ secrets.quayUsername }} password: ${{ secrets.quayPassword }} + - name: Cache GRPC + uses: docker/build-push-action@v5 + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + IMAGE_TYPE=${{ inputs.image-type }} + BASE_IMAGE=${{ inputs.base-image }} + MAKEFLAGS=${{ inputs.makeflags }} + GRPC_VERSION=v1.58.0 + context: . + file: ./Dockerfile + cache-from: type=gha + cache-to: type=gha,ignore-error=true + target: grpc + platforms: ${{ inputs.platforms }} + push: false + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + - name: Build and push uses: docker/build-push-action@v5 with: @@ -198,18 +222,20 @@ jobs: MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile + cache-from: type=gha platforms: ${{ inputs.platforms }} push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - - - name: Inspect image + + - name: Inspect image if: github.event_name != 'pull_request' run: | docker pull localai/localai:${{ steps.meta.outputs.version }} docker image inspect localai/localai:${{ steps.meta.outputs.version }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + - name: Build and push AIO image if: inputs.aio != '' uses: docker/build-push-action@v5 @@ -217,12 +243,14 @@ jobs: builder: ${{ steps.buildx.outputs.name }} build-args: | BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile.aio platforms: ${{ inputs.platforms }} push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta_aio.outputs.tags }} labels: ${{ steps.meta_aio.outputs.labels }} + - name: Build and push AIO image (dockerhub) if: inputs.aio != '' uses: docker/build-push-action@v5 @@ -230,15 +258,18 @@ jobs: builder: ${{ steps.buildx.outputs.name }} build-args: | BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }} + MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile.aio platforms: ${{ inputs.platforms }} push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta_aio_dockerhub.outputs.tags }} labels: ${{ steps.meta_aio_dockerhub.outputs.labels }} + - name: job summary run: | echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY + - name: job summary(AIO) if: inputs.aio != '' run: | diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 6ac816ee..1d749189 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -2,6 +2,9 @@ name: Build and Release on: push +env: + GRPC_VERSION: v1.58.0 + permissions: contents: write @@ -32,7 +35,8 @@ jobs: submodules: true - uses: actions/setup-go@v4 with: - go-version: '>=1.21.0' + go-version: '1.21.x' + cache: false - name: Dependencies run: | sudo apt-get update @@ -54,17 +58,17 @@ jobs: uses: actions/cache@v3 with: path: grpc - key: ${{ runner.os }}-grpc + key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }} - name: Build grpc if: steps.cache-grpc.outputs.cache-hit != 'true' run: | - git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ -DgRPC_BUILD_TESTS=OFF \ - ../.. && sudo make -j12 + ../.. && sudo make --jobs 5 --output-sync=target - name: Install gRPC run: | - cd grpc && cd cmake/build && sudo make -j12 install + cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install - name: Build id: build env: @@ -98,7 +102,8 @@ jobs: submodules: true - uses: actions/setup-go@v4 with: - go-version: '>=1.21.0' + go-version: '1.21.x' + cache: false - name: Dependencies run: | sudo apt-get install -y --no-install-recommends libopencv-dev @@ -135,7 +140,8 @@ jobs: submodules: true - uses: actions/setup-go@v4 with: - go-version: '>=1.21.0' + go-version: '1.21.x' + cache: false - name: Dependencies run: | brew install protobuf grpc diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 5f61835d..6f92c806 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -40,8 +40,8 @@ jobs: - name: Test transformers run: | export PATH=$PATH:/opt/conda/bin - make -C backend/python/transformers - make -C backend/python/transformers test + make --jobs=5 --output-sync=target -C backend/python/transformers + make --jobs=5 --output-sync=target -C backend/python/transformers test tests-sentencetransformers: runs-on: ubuntu-latest @@ -69,8 +69,8 @@ jobs: - name: Test sentencetransformers run: | export PATH=$PATH:/opt/conda/bin - make -C backend/python/sentencetransformers - make -C backend/python/sentencetransformers test + make --jobs=5 --output-sync=target -C backend/python/sentencetransformers + make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test tests-diffusers: runs-on: ubuntu-latest @@ -98,8 +98,8 @@ jobs: - name: Test diffusers run: | export PATH=$PATH:/opt/conda/bin - make -C backend/python/diffusers - make -C backend/python/diffusers test + make --jobs=5 --output-sync=target -C backend/python/diffusers + make --jobs=5 --output-sync=target -C backend/python/diffusers test tests-transformers-musicgen: @@ -128,8 +128,8 @@ jobs: - name: Test transformers-musicgen run: | export PATH=$PATH:/opt/conda/bin - make -C backend/python/transformers-musicgen - make -C backend/python/transformers-musicgen test + make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen + make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test @@ -159,8 +159,8 @@ jobs: - name: Test petals run: | export PATH=$PATH:/opt/conda/bin - make -C backend/python/petals - make -C backend/python/petals test + make --jobs=5 --output-sync=target -C backend/python/petals + make --jobs=5 --output-sync=target -C backend/python/petals test @@ -230,8 +230,8 @@ jobs: # - name: Test bark # run: | # export PATH=$PATH:/opt/conda/bin - # make -C backend/python/bark - # make -C backend/python/bark test + # make --jobs=5 --output-sync=target -C backend/python/bark + # make --jobs=5 --output-sync=target -C backend/python/bark test # Below tests needs GPU. Commented out for now @@ -260,8 +260,8 @@ jobs: # - name: Test vllm # run: | # export PATH=$PATH:/opt/conda/bin - # make -C backend/python/vllm - # make -C backend/python/vllm test + # make --jobs=5 --output-sync=target -C backend/python/vllm + # make --jobs=5 --output-sync=target -C backend/python/vllm test tests-vallex: runs-on: ubuntu-latest steps: @@ -286,8 +286,8 @@ jobs: - name: Test vall-e-x run: | export PATH=$PATH:/opt/conda/bin - make -C backend/python/vall-e-x - make -C backend/python/vall-e-x test + make --jobs=5 --output-sync=target -C backend/python/vall-e-x + make --jobs=5 --output-sync=target -C backend/python/vall-e-x test tests-coqui: runs-on: ubuntu-latest @@ -313,5 +313,5 @@ jobs: - name: Test coqui run: | export PATH=$PATH:/opt/conda/bin - make -C backend/python/coqui - make -C backend/python/coqui test + make --jobs=5 --output-sync=target -C backend/python/coqui + make --jobs=5 --output-sync=target -C backend/python/coqui test diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 203aeeca..95d10862 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,6 +9,9 @@ on: tags: - '*' +env: + GRPC_VERSION: v1.58.0 + concurrency: group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} cancel-in-progress: true @@ -60,6 +63,7 @@ jobs: uses: actions/setup-go@v4 with: go-version: ${{ matrix.go-version }} + cache: false # You can test your matrix by printing the current Go version - name: Display Go version run: go version @@ -91,20 +95,20 @@ jobs: uses: actions/cache@v3 with: path: grpc - key: ${{ runner.os }}-grpc + key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }} - name: Build grpc if: steps.cache-grpc.outputs.cache-hit != 'true' run: | - git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ -DgRPC_BUILD_TESTS=OFF \ - ../.. && sudo make -j12 + ../.. && sudo make --jobs 5 - name: Install gRPC run: | - cd grpc && cd cmake/build && sudo make -j12 install + cd grpc && cd cmake/build && sudo make --jobs 5 install - name: Test run: | - GO_TAGS="stablediffusion tts" make test + GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3 @@ -151,7 +155,7 @@ jobs: submodules: true - name: Build images run: | - docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core -t local-ai:tests -f Dockerfile . + docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile . BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio - name: Test run: | @@ -176,17 +180,20 @@ jobs: uses: actions/setup-go@v4 with: go-version: ${{ matrix.go-version }} + cache: false # You can test your matrix by printing the current Go version - name: Display Go version run: go version - name: Dependencies run: | - brew install protobuf grpc + brew install protobuf grpc make - name: Test run: | export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include - BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test + # Used to run the newer GNUMake version from brew that supports --output-sync + export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH" + BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3 diff --git a/Dockerfile b/Dockerfile index 8725e76d..5fb6230c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -90,11 +90,35 @@ RUN if [ ! -e /usr/bin/python ]; then \ ################################### ################################### +FROM ${BASE_IMAGE} as grpc + +ARG MAKEFLAGS +ARG GRPC_VERSION=v1.58.0 + +ENV MAKEFLAGS=${MAKEFLAGS} + +WORKDIR /build + +RUN apt-get update && \ + apt-get install -y g++ cmake git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc + +RUN cd grpc && \ + mkdir -p cmake/build && \ + cd cmake/build && \ + cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \ + make + +################################### +################################### + FROM requirements-${IMAGE_TYPE} as builder ARG GO_TAGS="stablediffusion tts" ARG GRPC_BACKENDS -ARG BUILD_GRPC=true ARG MAKEFLAGS ENV GRPC_BACKENDS=${GRPC_BACKENDS} @@ -121,12 +145,9 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ # stablediffusion does not tolerate a newer version of abseil, build it first RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build -RUN if [ "${BUILD_GRPC}" = "true" ]; then \ - git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ - -DgRPC_BUILD_TESTS=OFF \ - ../.. && make install \ - ; fi +COPY --from=grpc /build/grpc ./grpc/ + +RUN cd /build/grpc/cmake/build && make install # Rebuild with defaults backends RUN make build @@ -179,7 +200,7 @@ WORKDIR /build COPY . . COPY --from=builder /build/sources ./sources/ -COPY --from=builder /build/grpc ./grpc/ +COPY --from=grpc /build/grpc ./grpc/ RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc diff --git a/Makefile b/Makefile index 6c02b80e..03764d86 100644 --- a/Makefile +++ b/Makefile @@ -355,7 +355,7 @@ prepare-e2e: mkdir -p $(TEST_DIR) cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin - docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests . + docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests . run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures) @@ -564,6 +564,7 @@ docker: --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ + --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ -t $(DOCKER_IMAGE) . @@ -571,6 +572,7 @@ docker-aio: @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" docker build \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . docker-aio-all: @@ -582,6 +584,7 @@ docker-image-intel: --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ + --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . docker-image-intel-xpu: @@ -589,6 +592,7 @@ docker-image-intel-xpu: --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ + --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . .PHONY: swagger From 92fbdfd06f0bf66f27e132f576fcb58cf9c8b9ef Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 29 Mar 2024 22:48:58 +0100 Subject: [PATCH 0214/2895] feat(swagger): update (#1929) --- swagger/docs.go | 8 ++++---- swagger/swagger.json | 8 ++++---- swagger/swagger.yaml | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/swagger/docs.go b/swagger/docs.go index a922fa2e..0f5c2c47 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -11,12 +11,12 @@ const docTemplate = `{ "description": "{{escape .Description}}", "title": "{{.Title}}", "contact": { - "name": "OpenAI Support", - "url": "https://help.openai.com/" + "name": "LocalAI", + "url": "https://localai.io" }, "license": { "name": "MIT", - "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE" + "url": "https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE" }, "version": "{{.Version}}" }, @@ -789,7 +789,7 @@ var SwaggerInfo = &swag.Spec{ BasePath: "/", Schemes: []string{}, Title: "LocalAI API", - Description: "The OpenAI REST API.", + Description: "The LocalAI Rest API.", InfoInstanceName: "swagger", SwaggerTemplate: docTemplate, LeftDelim: "{{", diff --git a/swagger/swagger.json b/swagger/swagger.json index c9088d93..37dbec47 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -1,15 +1,15 @@ { "swagger": "2.0", "info": { - "description": "The OpenAI REST API.", + "description": "The LocalAI Rest API.", "title": "LocalAI API", "contact": { - "name": "OpenAI Support", - "url": "https://help.openai.com/" + "name": "LocalAI", + "url": "https://localai.io" }, "license": { "name": "MIT", - "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE" + "url": "https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE" }, "version": "2.0.0" }, diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index b33e1fab..91180359 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -368,12 +368,12 @@ definitions: host: localhost:8080 info: contact: - name: OpenAI Support - url: https://help.openai.com/ - description: The OpenAI REST API. + name: LocalAI + url: https://localai.io + description: The LocalAI Rest API. license: name: MIT - url: https://github.com/openai/openai-openapi/blob/master/LICENSE + url: https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE title: LocalAI API version: 2.0.0 paths: From bcdc83b46d90dfdaecbcba17a5a8a45702b6cf71 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 29 Mar 2024 23:00:06 +0100 Subject: [PATCH 0215/2895] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 716fe154..c56dced5 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -112,7 +112,7 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca ## Try it out -LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [All-in-one Images]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`. +LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [Integrations]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`, you can find few examples below. ### Text Generation From 2bba62ca4daf407bd6c7b141f3a0f9b2e30b7599 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 29 Mar 2024 23:52:01 +0100 Subject: [PATCH 0216/2895] :arrow_up: Update ggerganov/llama.cpp (#1928) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 03764d86..1a0d97e1 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=5106ef482c65ac60ac14da9a68c7b37bca4c6993 +CPPLLAMA_VERSION?=ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From eab4a91a9bdcd2e6000234860b578437e459da5b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 30 Mar 2024 12:04:32 +0100 Subject: [PATCH 0217/2895] fix(aio): correctly detect intel systems (#1931) Also rename SIZE to PROFILE --- aio/entrypoint.sh | 119 +++++++++++++--------- docs/content/docs/reference/aio-images.md | 2 +- 2 files changed, 72 insertions(+), 49 deletions(-) diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index d04e5642..a2e040fa 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -5,54 +5,77 @@ echo "===> LocalAI All-in-One (AIO) container starting..." GPU_ACCELERATION=false GPU_VENDOR="" +function check_intel() { + if lspci | grep -E 'VGA|3D' | grep -iq intel; then + echo "Intel GPU detected" + if [ -d /opt/intel ]; then + GPU_ACCELERATION=true + GPU_VENDOR=intel + else + echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available." + fi + fi +} + +function check_nvidia_wsl() { + if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then + # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi + # Make sure the container was run with `--gpus all` as the only required parameter + echo "NVIDIA GPU detected via WSL2" + # nvidia-smi should be installed in the container + if nvidia-smi; then + GPU_ACCELERATION=true + GPU_VENDOR=nvidia + else + echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available." + fi + fi +} + +function check_amd() { + if lspci | grep -E 'VGA|3D' | grep -iq amd; then + echo "AMD GPU detected" + # Check if ROCm is installed + if [ -d /opt/rocm ]; then + GPU_ACCELERATION=true + GPU_VENDOR=amd + else + echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available." + fi + fi +} + +function check_nvidia() { + if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then + echo "NVIDIA GPU detected" + # nvidia-smi should be installed in the container + if nvidia-smi; then + GPU_ACCELERATION=true + GPU_VENDOR=nvidia + else + echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available." + fi + fi +} + +function check_metal() { + if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then + echo "Apple Metal supported GPU detected" + GPU_ACCELERATION=true + GPU_VENDOR=apple + fi +} + function detect_gpu() { case "$(uname -s)" in Linux) - if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then - echo "NVIDIA GPU detected" - # nvidia-smi should be installed in the container - if nvidia-smi; then - GPU_ACCELERATION=true - GPU_VENDOR=nvidia - else - echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available." - fi - elif lspci | grep -E 'VGA|3D' | grep -iq amd; then - echo "AMD GPU detected" - # Check if ROCm is installed - if [ -d /opt/rocm ]; then - GPU_ACCELERATION=true - GPU_VENDOR=amd - else - echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available." - fi - elif lspci | grep -E 'VGA|3D' | grep -iq intel; then - echo "Intel GPU detected" - if [ -d /opt/intel ]; then - GPU_ACCELERATION=true - GPU_VENDOR=intel - else - echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available." - fi - elif lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then - # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi - # Make sure the container was run with `--gpus all` as the only required parameter - echo "NVIDIA GPU detected via WSL2" - # nvidia-smi should be installed in the container - if nvidia-smi; then - GPU_ACCELERATION=true - GPU_VENDOR=nvidia - else - echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available." - fi - fi + check_nvidia + check_amd + check_intel + check_nvidia_wsl ;; Darwin) - if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then - echo "Apple Metal supported GPU detected" - GPU_ACCELERATION=true - GPU_VENDOR=apple - fi + check_metal ;; esac } @@ -96,8 +119,8 @@ function check_vars() { exit 1 fi - if [ -z "$SIZE" ]; then - echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple" + if [ -z "$PROFILE" ]; then + echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple" exit 1 fi } @@ -105,11 +128,11 @@ function check_vars() { detect_gpu detect_gpu_size -SIZE="${SIZE:-$GPU_SIZE}" # default to cpu -export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}" +PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu +export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}" check_vars -echo "Starting LocalAI with the following models: $MODELS" +echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS" /build/entrypoint.sh "$@" diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md index c2cb57ba..40f01f06 100644 --- a/docs/content/docs/reference/aio-images.md +++ b/docs/content/docs/reference/aio-images.md @@ -46,7 +46,7 @@ The AIO Images are inheriting the same environment variables as the base images | Variable | Default | Description | | ---------------------| ------- | ----------- | -| `SIZE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | +| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | | `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) | From 61e5e6bc36adb51b3ba29d27f5208222a8d69db3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 30 Mar 2024 12:04:41 +0100 Subject: [PATCH 0218/2895] fix(swagger): do not specify a host (#1930) In this way the requests are redirected to the host used by the client to perform the request. --- core/http/api.go | 1 - swagger/docs.go | 2 +- swagger/swagger.json | 1 - swagger/swagger.yaml | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/core/http/api.go b/core/http/api.go index ff413b0a..af38512a 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -51,7 +51,6 @@ func readAuthHeader(c *fiber.Ctx) string { // @contact.url https://localai.io // @license.name MIT // @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE -// @host localhost:8080 // @BasePath / // @securityDefinitions.apikey BearerAuth // @in header diff --git a/swagger/docs.go b/swagger/docs.go index 0f5c2c47..e0199673 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -785,7 +785,7 @@ const docTemplate = `{ // SwaggerInfo holds exported Swagger Info so clients can modify it var SwaggerInfo = &swag.Spec{ Version: "2.0.0", - Host: "localhost:8080", + Host: "", BasePath: "/", Schemes: []string{}, Title: "LocalAI API", diff --git a/swagger/swagger.json b/swagger/swagger.json index 37dbec47..4d7102c4 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -13,7 +13,6 @@ }, "version": "2.0.0" }, - "host": "localhost:8080", "basePath": "/", "paths": { "/v1/assistants": { diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 91180359..86caff8a 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -365,7 +365,6 @@ definitions: type: type: string type: object -host: localhost:8080 info: contact: name: LocalAI From 957f428fd5adacb12bc094ddfdc5f3c784dadbed Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 30 Mar 2024 19:02:07 +0100 Subject: [PATCH 0219/2895] fix(tools): correctly render tools response in templates (#1932) * fix(tools): allow to correctly display both Functions and Tools * models(hermes-2-pro): correctly display function results --- aio/gpu-8g/text-to-text.yaml | 21 ++++----- aio/intel/text-to-text.yaml | 21 ++++----- core/http/endpoints/openai/chat.go | 55 +++++++++++++++-------- embedded/models/hermes-2-pro-mistral.yaml | 21 ++++----- 4 files changed, 64 insertions(+), 54 deletions(-) diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index c6f26c07..1a67169b 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -3,30 +3,27 @@ mmap: true parameters: model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf -roles: - assistant_function_call: assistant - function: tool template: chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} - {{ if eq .RoleName "assistant_function_call" }}{{end}} - {{ if eq .RoleName "function" }}{{end}} + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} {{if .Content}}{{.Content}}{{end}} {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} - {{ if eq .RoleName "assistant_function_call" }}{{end}} - {{ if eq .RoleName "function" }}{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system - You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {{range .Functions}} {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} {{end}} - - Use the following pydantic model json schema for each tool call you will make: - {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index ef36b562..0577d19b 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -4,30 +4,27 @@ f16: false parameters: model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf -roles: - assistant_function_call: assistant - function: tool template: chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} - {{ if eq .RoleName "assistant_function_call" }}{{end}} - {{ if eq .RoleName "function" }}{{end}} + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} {{if .Content}}{{.Content}}{{end}} {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} - {{ if eq .RoleName "assistant_function_call" }}{{end}} - {{ if eq .RoleName "function" }}{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system - You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {{range .Functions}} {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} {{end}} - - Use the following pydantic model json schema for each tool call you will make: - {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index c2e22962..f5f03eb4 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -236,7 +236,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // if function call, we might want to customize the role so we can display better that the "assistant called a json action" // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if i.FunctionCall != nil && i.Role == "assistant" { + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { roleFn := "assistant_function_call" r := config.Roles[roleFn] if r != "" { @@ -246,6 +246,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup r := config.Roles[role] contentExists := i.Content != nil && i.StringContent != "" + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + // First attempt to populate content via a chat message specific template if config.TemplateConfig.ChatMessage != "" { chatMessageData := model.ChatMessageTemplateData{ @@ -253,7 +258,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup Role: r, RoleName: role, Content: i.StringContent, - FunctionCall: i.FunctionCall, + FunctionCall: fcall, FunctionName: i.Name, LastMessage: messageIndex == (len(input.Messages) - 1), Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), @@ -271,35 +276,49 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup content = templatedChatMessage } } + + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. if content == "" { if r != "" { if contentExists { content = fmt.Sprint(r, i.StringContent) } + if i.FunctionCall != nil { - j, err := json.Marshal(i.FunctionCall) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) - } else { - content = fmt.Sprint(r, " ", string(j)) - } - } + marshalAnyRole(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAnyRole(i.ToolCalls) } } else { if contentExists { content = fmt.Sprint(i.StringContent) } if i.FunctionCall != nil { - j, err := json.Marshal(i.FunctionCall) - if err == nil { - if contentExists { - content += "\n" + string(j) - } else { - content = string(j) - } - } + marshalAny(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) } } // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index 84510d2a..108216f5 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -3,30 +3,27 @@ mmap: true parameters: model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf -roles: - assistant_function_call: assistant - function: tool template: chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}} - {{ if eq .RoleName "assistant_function_call" }}{{end}} - {{ if eq .RoleName "function" }}{{end}} + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} {{if .Content}}{{.Content}}{{end}} {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} - {{ if eq .RoleName "assistant_function_call" }}{{end}} - {{ if eq .RoleName "function" }}{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system - You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {{range .Functions}} {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} {{end}} - - Use the following pydantic model json schema for each tool call you will make: - {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } From 831efa8893b6fe9b983ee1c28b74c1777da8b0cb Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 31 Mar 2024 00:27:16 +0100 Subject: [PATCH 0220/2895] :arrow_up: Update ggerganov/whisper.cpp (#1933) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a0d97e1..4561ea15 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=1e8f28c42a1472ae7c49d0502ea06e2f5bc29a69 +WHISPER_CPP_VERSION?=ac283dbce7d42735e3ed985329037bf23fe180aa # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 784657a652b152bea211d42a0f7b43c29ab4cad3 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 31 Mar 2024 00:27:38 +0100 Subject: [PATCH 0221/2895] :arrow_up: Update ggerganov/llama.cpp (#1934) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4561ea15..474171bb 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c +CPPLLAMA_VERSION?=37e7854c104301c5b5323ccc40e07699f3a62c3e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 35290e146b8b575cd691c844dd611ead3c111c0b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 31 Mar 2024 13:04:09 +0200 Subject: [PATCH 0222/2895] fix(grammar): respect JSONmode and grammar from user input (#1935) * fix(grammar): Fix JSON mode and custom grammar * tests(aio): add jsonmode test * tests(aio): add functioncall test * fix(aio): use hermes-2-pro-mistral as llm for CPU profile * add phi-2-orange --- aio/cpu/text-to-text.yaml | 31 ++++++++-- core/http/endpoints/openai/chat.go | 2 + core/http/endpoints/openai/completion.go | 2 + embedded/models/phi-2-orange.yaml | 30 +++++++++ tests/e2e-aio/e2e_test.go | 79 +++++++++++++++++++++++- 5 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 embedded/models/phi-2-orange.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 4fd88500..aeb3c842 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -1,25 +1,48 @@ name: gpt-4 mmap: true parameters: - model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf template: chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + chat: | {{.Input}} <|im_start|>assistant completion: | {{.Input}} -context_size: 2048 +context_size: 4096 f16: true stopwords: - <|im_end|> - usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2-chat", + "model": "gpt-4", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index f5f03eb4..837b6e12 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup input.Grammar = grammar.JSONBNF } + config.Grammar = input.Grammar + // process functions if we have any defined or if we have a function call string if len(input.Functions) > 0 && config.ShouldUseFunctions() { log.Debug().Msgf("Response needs to process functions") diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index a67f0993..69923475 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a input.Grammar = grammar.JSONBNF } + config.Grammar = input.Grammar + log.Debug().Msgf("Parameter Config: %+v", config) if input.Stream { diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml new file mode 100644 index 00000000..9207d283 --- /dev/null +++ b/embedded/models/phi-2-orange.yaml @@ -0,0 +1,30 @@ +name: phi-2-chat +mmap: true +parameters: + model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf + +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}} + <|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- + +description: | + This model is a chatbot that can be used for general conversation. + [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF) + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2-chat", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go index c52d789e..8fcd1280 100644 --- a/tests/e2e-aio/e2e_test.go +++ b/tests/e2e-aio/e2e_test.go @@ -2,6 +2,7 @@ package e2e_test import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -9,8 +10,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/sashabaranov/go-openai" + "github.com/sashabaranov/go-openai/jsonschema" ) var _ = Describe("E2E test", func() { @@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() { Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content)) }) }) + + Context("function calls", func() { + It("correctly invoke", func() { + params := jsonschema.Definition{ + Type: jsonschema.Object, + Properties: map[string]jsonschema.Definition{ + "location": { + Type: jsonschema.String, + Description: "The city and state, e.g. San Francisco, CA", + }, + "unit": { + Type: jsonschema.String, + Enum: []string{"celsius", "fahrenheit"}, + }, + }, + Required: []string{"location"}, + } + + f := openai.FunctionDefinition{ + Name: "get_current_weather", + Description: "Get the current weather in a given location", + Parameters: params, + } + t := openai.Tool{ + Type: openai.ToolTypeFunction, + Function: &f, + } + + dialogue := []openai.ChatCompletionMessage{ + {Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"}, + } + resp, err := client.CreateChatCompletion(context.TODO(), + openai.ChatCompletionRequest{ + Model: openai.GPT4, + Messages: dialogue, + Tools: []openai.Tool{t}, + }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + + msg := resp.Choices[0].Message + Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls)) + Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name)) + Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments)) + }) + }) + Context("json", func() { + It("correctly", func() { + model := "gpt-4" + + req := openai.ChatCompletionRequest{ + ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject}, + Model: model, + Messages: []openai.ChatCompletionMessage{ + { + + Role: "user", + Content: "An animal with 'name', 'gender' and 'legs' fields", + }, + }, + } + + resp, err := client.CreateChatCompletion(context.TODO(), req) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + + var i map[string]interface{} + err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i) + Expect(err).ToNot(HaveOccurred()) + Expect(i).To(HaveKey("name")) + Expect(i).To(HaveKey("gender")) + Expect(i).To(HaveKey("legs")) + }) + }) + Context("images", func() { It("correctly", func() { resp, err := client.CreateImage(context.TODO(), From 3c778b538aee121543ddaeb334cbb7f0e4790d98 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 31 Mar 2024 13:06:41 +0200 Subject: [PATCH 0223/2895] Update phi-2-orange.yaml Signed-off-by: Ettore Di Giacinto --- embedded/models/phi-2-orange.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml index 9207d283..838909c9 100644 --- a/embedded/models/phi-2-orange.yaml +++ b/embedded/models/phi-2-orange.yaml @@ -1,4 +1,4 @@ -name: phi-2-chat +name: phi-2-orange mmap: true parameters: model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf @@ -25,6 +25,6 @@ description: | usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2-chat", + "model": "phi-2-orange", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' From 66f90f8dc1cb49d8926cfd5377b9409b4e8380e9 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 1 Apr 2024 08:59:23 +0200 Subject: [PATCH 0224/2895] :arrow_up: Update ggerganov/llama.cpp (#1937) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 474171bb..2f80a121 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=37e7854c104301c5b5323ccc40e07699f3a62c3e +CPPLLAMA_VERSION?=c50a82ce0f71558cbb8e555146ba124251504b38 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From ebb1fcedea2f41292d0ce3e294f5df2375c69a0a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 1 Apr 2024 11:48:35 +0200 Subject: [PATCH 0225/2895] fix(hermes-2-pro-mistral): add stopword for toolcall (#1939) Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 1 + aio/gpu-8g/text-to-text.yaml | 1 + aio/intel/text-to-text.yaml | 1 + embedded/models/hermes-2-pro-mistral.yaml | 1 + 4 files changed, 4 insertions(+) diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index aeb3c842..d30f403e 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -41,6 +41,7 @@ f16: true stopwords: - <|im_end|> - +- "\n" usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "gpt-4", diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 1a67169b..1645a257 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -41,6 +41,7 @@ f16: true stopwords: - <|im_end|> - +- "\n" usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "gpt-4", diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index 0577d19b..3f3d2c39 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -40,6 +40,7 @@ template: context_size: 4096 stopwords: - <|im_end|> +- "\n" - usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index 108216f5..3792be78 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -41,6 +41,7 @@ f16: true stopwords: - <|im_end|> - +- "\n" usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "hermes-2-pro-mistral", From e8f02c083f03b04ffc58eb8ba1f093c9227be0a5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 1 Apr 2024 19:39:54 +0200 Subject: [PATCH 0226/2895] fix(functions): respect when selected from string (#1940) * fix(functions): respect when selected from string * fix(toolschoice): decode both string and objects --- core/config/backend_config.go | 7 ++++++- core/http/endpoints/openai/request.go | 9 ++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 32e10a17..db9c6665 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -185,7 +185,12 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool { } func (c *BackendConfig) FunctionToCall() string { - return c.functionCallNameString + if c.functionCallNameString != "" && + c.functionCallNameString != "none" && c.functionCallNameString != "auto" { + return c.functionCallNameString + } + + return c.functionCallString } func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index 1f845c6f..c9981204 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -146,7 +146,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque if input.ToolsChoice != nil { var toolChoice grammar.Tool - json.Unmarshal([]byte(input.ToolsChoice.(string)), &toolChoice) + + switch content := input.ToolsChoice.(type) { + case string: + _ = json.Unmarshal([]byte(content), &toolChoice) + case map[string]interface{}: + dat, _ := json.Marshal(content) + _ = json.Unmarshal(dat, &toolChoice) + } input.FunctionCall = map[string]interface{}{ "name": toolChoice.Function.Name, } From 86bc5f1350c5841a2a7d029f5f53faf52d025fd1 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Tue, 2 Apr 2024 02:15:44 -0500 Subject: [PATCH 0227/2895] fix: use exec in entrypoint scripts to fix signal handling (#1943) --- aio/entrypoint.sh | 2 +- entrypoint.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index a2e040fa..5fd8d9c2 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -135,4 +135,4 @@ check_vars echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS" -/build/entrypoint.sh "$@" +exec /build/entrypoint.sh "$@" diff --git a/entrypoint.sh b/entrypoint.sh index 05f67128..fb8417df 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -47,4 +47,4 @@ else echo "@@@@@" fi -./local-ai "$@" +exec ./local-ai "$@" From 4d4d76114dc7c58f8e9504cb018138c311007824 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 2 Apr 2024 09:16:04 +0200 Subject: [PATCH 0228/2895] :arrow_up: Update ggerganov/llama.cpp (#1941) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2f80a121..a98eac67 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=c50a82ce0f71558cbb8e555146ba124251504b38 +CPPLLAMA_VERSION?=f87f7b898651339fe173ddf016ca826163e899d8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 84e0dc3246fabd658ef8ba8ca983e8f5f36f3706 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 2 Apr 2024 15:38:00 +0200 Subject: [PATCH 0229/2895] fix(hermes-2-pro-mistral): correct stopwords (#1947) Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 1 + aio/gpu-8g/text-to-text.yaml | 1 + aio/intel/text-to-text.yaml | 1 + embedded/models/hermes-2-pro-mistral.yaml | 1 + 4 files changed, 4 insertions(+) diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index d30f403e..8a20109d 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -42,6 +42,7 @@ stopwords: - <|im_end|> - - "\n
" +- "\n\n\n" usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "gpt-4", diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 1645a257..9502cdfe 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -42,6 +42,7 @@ stopwords: - <|im_end|> - - "\n
" +- "\n\n\n" usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "gpt-4", diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index 3f3d2c39..d2316745 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -42,6 +42,7 @@ stopwords: - <|im_end|> - "\n
" - +- "\n\n\n" usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "gpt-4", diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index 3792be78..eb75b97c 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -42,6 +42,7 @@ stopwords: - <|im_end|> - - "\n" +- "\n\n\n" usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "hermes-2-pro-mistral", From 9bc209ba731a6b5fafc5f6c646de563757b70ea3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 2 Apr 2024 19:25:32 +0200 Subject: [PATCH 0230/2895] fix(welcome): stable model list (#1949) --- core/config/backend_config.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index db9c6665..9b227578 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -7,6 +7,7 @@ import ( "math/rand" "os" "path/filepath" + "sort" "strings" "sync" @@ -455,6 +456,11 @@ func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { for _, v := range cl.configs { res = append(res, v) } + + sort.SliceStable(res, func(i, j int) bool { + return res[i].Name < res[j].Name + }) + return res } From 89560ef87f5146a53cf3d0df704ee3eede88dd3f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 2 Apr 2024 19:25:46 +0200 Subject: [PATCH 0231/2895] fix(ci): manually tag latest images (#1948) fix(ci): manually tag images Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 8 ++++++++ .github/workflows/image_build.yml | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 79a38fc5..0c708b1d 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -28,6 +28,7 @@ jobs: base-image: ${{ matrix.base-image }} aio: ${{ matrix.aio }} makeflags: ${{ matrix.makeflags }} + latest-image: ${{ matrix.latest-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -92,6 +93,7 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" aio: "-aio-gpu-nvidia-cuda-11" + latest-image: 'latest-aio-gpu-nvidia-cuda-11' makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" @@ -104,6 +106,7 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" aio: "-aio-gpu-nvidia-cuda-12" + latest-image: 'latest-aio-gpu-nvidia-cuda-12' makeflags: "--jobs=3 --output-sync=target" - build-type: '' #platforms: 'linux/amd64,linux/arm64' @@ -123,6 +126,7 @@ jobs: image-type: 'extras' aio: "-aio-gpu-hipblas" base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + latest-image: 'latest-aio-gpu-hipblas' runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' @@ -143,6 +147,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' aio: "-aio-gpu-intel-f16" + latest-image: 'latest-aio-gpu-intel-f16' makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f32' platforms: 'linux/amd64' @@ -153,6 +158,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' aio: "-aio-gpu-intel-f32" + latest-image: 'latest-aio-gpu-intel-f32' makeflags: "--jobs=3 --output-sync=target" # Core images - build-type: 'sycl_f16' @@ -225,6 +231,7 @@ jobs: aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} makeflags: ${{ matrix.makeflags }} + latest-image: ${{ matrix.latest-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -242,6 +249,7 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' aio: "-aio-cpu" + latest-image: 'latest-aio-cpu' makeflags: "--jobs=5 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index d07df441..affa03bf 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -29,6 +29,10 @@ on: description: 'Tag latest' default: '' type: string + latest-image: + description: 'Tag latest' + default: '' + type: string tag-suffix: description: 'Tag suffix' default: '' @@ -266,6 +270,23 @@ jobs: tags: ${{ steps.meta_aio_dockerhub.outputs.tags }} labels: ${{ steps.meta_aio_dockerhub.outputs.labels }} + - name: Latest tag + # run this on branches, when it is a tag and there is a latest-image defined + if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' + run: | + docker pull localai/localai:${{ steps.meta.outputs.version }} + docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }} + docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }} + - name: Latest AIO tag + # run this on branches, when it is a tag and there is a latest-image defined + if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' + run: | + docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} + docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image }} + docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} + docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }} + - name: job summary run: | echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY From 93cfec3c326f98d9126dc0c835723a7e2ec5148d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 3 Apr 2024 11:30:12 +0200 Subject: [PATCH 0232/2895] ci: correctly tag latest and aio images --- .github/workflows/image.yml | 20 ++++++++++++++------ .github/workflows/image_build.yml | 10 +++++++--- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 0c708b1d..d2607579 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -29,6 +29,7 @@ jobs: aio: ${{ matrix.aio }} makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} + latest-image-aio: ${{ matrix.latest-image-aio }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -93,7 +94,8 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" aio: "-aio-gpu-nvidia-cuda-11" - latest-image: 'latest-aio-gpu-nvidia-cuda-11' + latest-image: 'latest-gpu-nvidia-cuda-11' + latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11' makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" @@ -106,7 +108,8 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" aio: "-aio-gpu-nvidia-cuda-12" - latest-image: 'latest-aio-gpu-nvidia-cuda-12' + latest-image: 'latest-gpu-nvidia-cuda-12' + latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12' makeflags: "--jobs=3 --output-sync=target" - build-type: '' #platforms: 'linux/amd64,linux/arm64' @@ -126,7 +129,8 @@ jobs: image-type: 'extras' aio: "-aio-gpu-hipblas" base-image: "rocm/dev-ubuntu-22.04:6.0-complete" - latest-image: 'latest-aio-gpu-hipblas' + latest-image: 'latest-gpu-hipblas' + latest-image-aio: 'latest-aio-gpu-hipblas' runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' @@ -147,7 +151,8 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' aio: "-aio-gpu-intel-f16" - latest-image: 'latest-aio-gpu-intel-f16' + latest-image: 'latest-gpu-intel-f16' + latest-image-aio: 'latest-aio-gpu-intel-f16' makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f32' platforms: 'linux/amd64' @@ -158,7 +163,8 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' aio: "-aio-gpu-intel-f32" - latest-image: 'latest-aio-gpu-intel-f32' + latest-image: 'latest-gpu-intel-f32' + latest-image-aio: 'latest-aio-gpu-intel-f32' makeflags: "--jobs=3 --output-sync=target" # Core images - build-type: 'sycl_f16' @@ -232,6 +238,7 @@ jobs: base-image: ${{ matrix.base-image }} makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} + latest-image-aio: ${{ matrix.latest-image-aio }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -249,7 +256,8 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' aio: "-aio-cpu" - latest-image: 'latest-aio-cpu' + latest-image: 'latest-cpu' + latest-image-aio: 'latest-aio-cpu' makeflags: "--jobs=5 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index affa03bf..cba78933 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -33,6 +33,10 @@ on: description: 'Tag latest' default: '' type: string + latest-image-aio: + description: 'Tag latest' + default: '' + type: string tag-suffix: description: 'Tag suffix' default: '' @@ -280,12 +284,12 @@ jobs: docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }} - name: Latest AIO tag # run this on branches, when it is a tag and there is a latest-image defined - if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' + if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag' run: | docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} - docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image }} + docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} - docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }} + docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} - name: job summary run: | From ff77d3bc22754ebac0aa624ed2f32ab355e8a310 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 3 Apr 2024 22:25:47 +0200 Subject: [PATCH 0233/2895] fix(seed): generate random seed per-request if -1 is set (#1952) * fix(seed): generate random seed per-request if -1 is set Also update ci with new workflows and allow the aio tests to run with an api key Signed-off-by: Ettore Di Giacinto * docs(openvino): Add OpenVINO example Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/labeler.yml | 19 +++++++++++ .github/workflows/labeler.yml | 12 +++++++ .github/workflows/secscan.yaml | 27 ++++++++++++++++ core/backend/options.go | 15 +++++++-- core/config/backend_config.go | 7 ++-- docs/content/docs/features/text-generation.md | 32 +++++++++++++++++++ tests/e2e-aio/e2e_suite_test.go | 5 +-- 7 files changed, 110 insertions(+), 7 deletions(-) create mode 100644 .github/labeler.yml create mode 100644 .github/workflows/labeler.yml create mode 100644 .github/workflows/secscan.yaml diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 00000000..64a88f43 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,19 @@ +enhancements: + - head-branch: ['^feature', 'feature'] + +kind/documentation: +- any: + - changed-files: + - any-glob-to-any-file: 'docs/*' + - changed-files: + - any-glob-to-any-file: '*.md' + +examples: +- any: + - changed-files: + - any-glob-to-any-file: 'examples/*' + +ci: +- any: + - changed-files: + - any-glob-to-any-file: '.github/*' \ No newline at end of file diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 00000000..e3ecf923 --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,12 @@ +name: "Pull Request Labeler" +on: +- pull_request_target + +jobs: + labeler: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@v5 \ No newline at end of file diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml new file mode 100644 index 00000000..a5221b40 --- /dev/null +++ b/.github/workflows/secscan.yaml @@ -0,0 +1,27 @@ +name: "Security Scan" + +# Run workflow each time code is pushed to your repository and on a schedule. +# The scheduled workflow runs every at 00:00 on Sunday UTC time. +on: + push: + schedule: + - cron: '0 0 * * 0' + +jobs: + tests: + runs-on: ubuntu-latest + env: + GO111MODULE: on + steps: + - name: Checkout Source + uses: actions/checkout@v3 + - name: Run Gosec Security Scanner + uses: securego/gosec@master + with: + # we let the report trigger content trigger a failure using the GitHub Security features. + args: '-no-fail -fmt sarif -out results.sarif ./...' + - name: Upload SARIF file + uses: github/codeql-action/upload-sarif@v2 + with: + # Path to SARIF file relative to the root of the repository + sarif_file: results.sarif \ No newline at end of file diff --git a/core/backend/options.go b/core/backend/options.go index bc7fa5a4..143a9332 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -1,6 +1,7 @@ package backend import ( + "math/rand" "os" "path/filepath" @@ -33,12 +34,20 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode return opts } +func getSeed(c config.BackendConfig) int32 { + seed := int32(*c.Seed) + if seed == config.RAND_SEED { + seed = rand.Int31() + } + + return seed +} + func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch } - return &pb.ModelOptions{ CUDA: c.CUDA || c.Diffusers.CUDA, SchedulerType: c.Diffusers.SchedulerType, @@ -54,7 +63,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { CLIPSkip: int32(c.Diffusers.ClipSkip), ControlNet: c.Diffusers.ControlNet, ContextSize: int32(*c.ContextSize), - Seed: int32(*c.Seed), + Seed: getSeed(c), NBatch: int32(b), NoMulMatQ: c.NoMulMatQ, DraftModel: c.DraftModel, @@ -129,7 +138,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption NKeep: int32(c.Keep), Batch: int32(c.Batch), IgnoreEOS: c.IgnoreEOS, - Seed: int32(*c.Seed), + Seed: getSeed(c), FrequencyPenalty: float32(c.FrequencyPenalty), MLock: *c.MMlock, MMap: *c.MMap, diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 9b227578..25edd343 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "io/fs" - "math/rand" "os" "path/filepath" "sort" @@ -20,6 +19,10 @@ import ( "github.com/charmbracelet/glamour" ) +const ( + RAND_SEED = -1 +) + type BackendConfig struct { schema.PredictionOptions `yaml:"parameters"` Name string `yaml:"name"` @@ -218,7 +221,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { if cfg.Seed == nil { // random number generator seed - defaultSeed := int(rand.Int31()) + defaultSeed := RAND_SEED cfg.Seed = &defaultSeed } diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md index 1d0e1e9e..c11894e7 100644 --- a/docs/content/docs/features/text-generation.md +++ b/docs/content/docs/features/text-generation.md @@ -304,6 +304,7 @@ The backend will automatically download the required files in order to run the m | Type | Description | | --- | --- | | `AutoModelForCausalLM` | `AutoModelForCausalLM` is a model that can be used to generate sequences. | +| `OVModelForCausalLM` | for OpenVINO models | | N/A | Defaults to `AutoModel` | @@ -324,4 +325,35 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d "prompt": "Hello, my name is", "temperature": 0.1, "top_p": 0.1 }' +``` + +#### Examples + +##### OpenVINO + +A model configuration file for openvion and starling model: + +```yaml +name: starling-openvino +backend: transformers +parameters: + model: fakezeta/Starling-LM-7B-beta-openvino-int8 +context_size: 8192 +threads: 6 +f16: true +type: OVModelForCausalLM +stopwords: +- <|end_of_turn|> +- <|endoftext|> +prompt_cache_path: "cache" +prompt_cache_all: true +template: + chat_message: | + {{if eq .RoleName "system"}}{{.Content}}<|end_of_turn|>{{end}}{{if eq .RoleName "assistant"}}<|end_of_turn|>GPT4 Correct Assistant: {{.Content}}<|end_of_turn|>{{end}}{{if eq .RoleName "user"}}GPT4 Correct User: {{.Content}}{{end}} + + chat: | + {{.Input}}<|end_of_turn|>GPT4 Correct Assistant: + + completion: | + {{.Input}} ``` \ No newline at end of file diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go index fa61c408..0aa68230 100644 --- a/tests/e2e-aio/e2e_suite_test.go +++ b/tests/e2e-aio/e2e_suite_test.go @@ -23,6 +23,7 @@ var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG") var modelsDir = os.Getenv("LOCALAI_MODELS_DIR") var apiPort = os.Getenv("LOCALAI_API_PORT") var apiEndpoint = os.Getenv("LOCALAI_API_ENDPOINT") +var apiKey = os.Getenv("LOCALAI_API_KEY") func TestLocalAI(t *testing.T) { RegisterFailHandler(Fail) @@ -38,11 +39,11 @@ var _ = BeforeSuite(func() { var defaultConfig openai.ClientConfig if apiEndpoint == "" { startDockerImage() - defaultConfig = openai.DefaultConfig("") + defaultConfig = openai.DefaultConfig(apiKey) defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1" } else { fmt.Println("Default ", apiEndpoint) - defaultConfig = openai.DefaultConfig("") + defaultConfig = openai.DefaultConfig(apiKey) defaultConfig.BaseURL = apiEndpoint } From 3851b51d98ee6dce4e05aa6b045e53917b39f267 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 4 Apr 2024 00:27:57 +0200 Subject: [PATCH 0234/2895] :arrow_up: Update ggerganov/llama.cpp (#1953) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a98eac67..019078a3 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=f87f7b898651339fe173ddf016ca826163e899d8 +CPPLLAMA_VERSION?=60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b85dad0286304993b4fd32f22006d30c6c2fd337 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 4 Apr 2024 02:24:22 -0500 Subject: [PATCH 0235/2895] feat: first pass at improving logging (#1956) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .vscode/extensions.json | 5 +++++ core/config/application_config.go | 2 +- core/http/endpoints/openai/chat.go | 10 +++++----- core/http/endpoints/openai/files_test.go | 5 +++-- core/services/backend_monitor.go | 10 +++++----- core/startup/config_file_watcher.go | 2 +- core/startup/startup.go | 6 +++--- main.go | 10 +++++----- pkg/gallery/models.go | 2 +- pkg/model/initializers.go | 6 +++--- pkg/model/watchdog.go | 8 ++++---- pkg/startup/model_preload.go | 19 +++++++++++-------- pkg/utils/config.go | 11 ++++++----- 13 files changed, 53 insertions(+), 43 deletions(-) create mode 100644 .vscode/extensions.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..7203cb3f --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "golang.go" + ] +} \ No newline at end of file diff --git a/core/config/application_config.go b/core/config/application_config.go index 49b35f97..9525553a 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -169,7 +169,7 @@ func WithStringGalleries(galls string) AppOption { } var galleries []gallery.Gallery if err := json.Unmarshal([]byte(galls), &galleries); err != nil { - log.Error().Msgf("failed loading galleries: %s", err.Error()) + log.Error().Err(err).Msg("failed loading galleries") } o.Galleries = append(o.Galleries, galleries...) } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 837b6e12..871ae6c1 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -84,7 +84,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) if err != nil { - log.Error().Msgf("error handling question: %s", err.Error()) + log.Error().Err(err).Msg("error handling question") return } @@ -268,7 +268,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) if err != nil { - log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, config.TemplateConfig.ChatMessage, err) + log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") } else { if templatedChatMessage == "" { log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) @@ -455,7 +455,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup case noActionsToRun: result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) if err != nil { - log.Error().Msgf("error handling question: %s", err.Error()) + log.Error().Err(err).Msg("error handling question") return } *c = append(*c, schema.Choice{ @@ -565,13 +565,13 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m predFunc, err := backend.ModelInference(input.Context, prompt, images, ml, *config, o, nil) if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) + log.Error().Err(err).Msg("model inference failed") return "", err } prediction, err := predFunc() if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) + log.Error().Err(err).Msg("prediction failed") return "", err } return backend.Finetune(*config, prompt, prediction.Response), nil diff --git a/core/http/endpoints/openai/files_test.go b/core/http/endpoints/openai/files_test.go index e1c1011e..fc77ae45 100644 --- a/core/http/endpoints/openai/files_test.go +++ b/core/http/endpoints/openai/files_test.go @@ -3,7 +3,6 @@ package openai import ( "encoding/json" "fmt" - "github.com/rs/zerolog/log" "io" "mime/multipart" "net/http" @@ -12,6 +11,8 @@ import ( "path/filepath" "strings" + "github.com/rs/zerolog/log" + "github.com/go-skynet/LocalAI/core/config" utils2 "github.com/go-skynet/LocalAI/pkg/utils" @@ -297,7 +298,7 @@ func responseToListFile(t *testing.T, resp *http.Response) ListFiles { err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles) if err != nil { - log.Error().Msgf("Failed to decode response: %s", err) + log.Error().Err(err).Msg("failed to decode response") } return listFiles diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go index 88176753..979a67a3 100644 --- a/core/services/backend_monitor.go +++ b/core/services/backend_monitor.go @@ -63,7 +63,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe pid, err := bm.modelLoader.GetGRPCPID(backend) if err != nil { - log.Error().Msgf("model %s : failed to find pid %+v", model, err) + log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid") return nil, err } @@ -71,26 +71,26 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe backendProcess, err := gopsutil.NewProcess(int32(pid)) if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err) + log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting process info") return nil, err } memInfo, err := backendProcess.MemoryInfo() if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err) + log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting memory info") return nil, err } memPercent, err := backendProcess.MemoryPercent() if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err) + log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting memory percent") return nil, err } cpuPercent, err := backendProcess.CPUPercent() if err != nil { - log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err) + log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting cpu percent") return nil, err } diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 0c7eff2d..9c758e25 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -85,7 +85,7 @@ func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) if !ok { return } - log.Error().Msgf("WatchConfigDirectory goroutine error: %+v", err) + log.Error().Err(err).Msg("error encountered while watching config directory") } } }() diff --git a/core/startup/startup.go b/core/startup/startup.go index 828eb7a7..6298f034 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -61,17 +61,17 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode configLoaderOpts := options.ToConfigLoaderOptions() if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { - log.Error().Msgf("error loading config files: %s", err.Error()) + log.Error().Err(err).Msg("error loading config files") } if options.ConfigFile != "" { if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil { - log.Error().Msgf("error loading config file: %s", err.Error()) + log.Error().Err(err).Msg("error loading config file") } } if err := cl.Preload(options.ModelPath); err != nil { - log.Error().Msgf("error downloading models: %s", err.Error()) + log.Error().Err(err).Msg("error downloading models") } if options.PreloadJSONModels != "" { diff --git a/main.go b/main.go index 0d8befcb..53966ba5 100644 --- a/main.go +++ b/main.go @@ -45,7 +45,7 @@ func main() { path, err := os.Getwd() if err != nil { - log.Error().Msgf("error: %s", err.Error()) + log.Error().Err(err).Msg("failed to get current directory") os.Exit(1) } @@ -340,7 +340,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit appHTTP, err := http.App(cl, ml, options) if err != nil { - log.Error().Msg("Error during HTTP App constructor") + log.Error().Err(err).Msg("error during HTTP App construction") return err } @@ -357,7 +357,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit Action: func(ctx *cli.Context) error { var galleries []gallery.Gallery if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil { - log.Error().Msgf("unable to load galleries: %s", err.Error()) + log.Error().Err(err).Msg("unable to load galleries") } models, err := gallery.AvailableGalleryModels(galleries, ctx.String("models-path")) @@ -382,7 +382,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit var galleries []gallery.Gallery if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil { - log.Error().Msgf("unable to load galleries: %s", err.Error()) + log.Error().Err(err).Msg("unable to load galleries") } progressBar := progressbar.NewOptions( @@ -547,7 +547,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit err = app.Run(os.Args) if err != nil { - log.Error().Msgf("error: %s", err.Error()) + log.Error().Err(err).Msg("application runtime error") os.Exit(1) } } diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 65d0401f..10caedee 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -67,7 +67,7 @@ func GetGalleryConfigFromURL(url string) (Config, error) { return yaml.Unmarshal(d, &config) }) if err != nil { - log.Error().Msgf("GetGalleryConfigFromURL error for url %s\n%s", url, err.Error()) + log.Error().Err(err).Str("url", url).Msg("failed to get gallery config for url") return config, err } return config, nil diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 85744f9a..5d9808a4 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -15,8 +15,8 @@ import ( ) var Aliases map[string]string = map[string]string{ - "go-llama": LLamaCPP, - "llama": LLamaCPP, + "go-llama": LLamaCPP, + "llama": LLamaCPP, "embedded-store": LocalStoreBackend, } @@ -127,7 +127,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string break } if err != nil && i == o.grpcAttempts-1 { - log.Error().Msgf("Failed starting/connecting to the gRPC service: %s", err.Error()) + log.Error().Err(err).Msg("failed starting/connecting to the gRPC service") } time.Sleep(time.Duration(o.grpcAttemptsDelay) * time.Second) } diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go index c93cb99a..b5381832 100644 --- a/pkg/model/watchdog.go +++ b/pkg/model/watchdog.go @@ -110,10 +110,10 @@ func (wd *WatchDog) checkIdle() { log.Debug().Msgf("[WatchDog] %s: idle connection", address) if time.Since(t) > wd.idletimeout { log.Warn().Msgf("[WatchDog] Address %s is idle for too long, killing it", address) - p, ok := wd.addressModelMap[address] + model, ok := wd.addressModelMap[address] if ok { - if err := wd.pm.ShutdownModel(p); err != nil { - log.Error().Msgf("[watchdog] Error shutting down model %s: %v", p, err) + if err := wd.pm.ShutdownModel(model); err != nil { + log.Error().Err(err).Str("model", model).Msg("[watchdog] error shutting down model") } log.Debug().Msgf("[WatchDog] model shut down: %s", address) delete(wd.idleTime, address) @@ -141,7 +141,7 @@ func (wd *WatchDog) checkBusy() { if ok { log.Warn().Msgf("[WatchDog] Model %s is busy for too long, killing it", model) if err := wd.pm.ShutdownModel(model); err != nil { - log.Error().Msgf("[watchdog] Error shutting down model %s: %v", model, err) + log.Error().Err(err).Str("model", model).Msg("[watchdog] error shutting down model") } log.Debug().Msgf("[WatchDog] model shut down: %s", address) delete(wd.timetable, address) diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index 979b4d83..b09516a7 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -35,14 +35,15 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model modelYAML, err := embedded.ResolveContent(url) // If we resolve something, just save it to disk and continue if err != nil { - log.Error().Msgf("error loading model: %s", err.Error()) + log.Error().Err(err).Msg("error resolving model content") continue } log.Debug().Msgf("[startup] resolved embedded model: %s", url) md5Name := utils.MD5(url) - if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil { - log.Error().Msgf("error loading model: %s", err.Error()) + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") } case downloader.LooksLikeURL(url): log.Debug().Msgf("[startup] resolved model to download: %s", url) @@ -52,11 +53,12 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model // check if file exists if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) { + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { utils.DisplayDownloadFunction(fileName, current, total, percent) }) if err != nil { - log.Error().Msgf("error loading model: %s", err.Error()) + log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") } } default: @@ -67,12 +69,13 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model modelYAML, err := os.ReadFile(url) if err != nil { - log.Error().Msgf("error loading model: %s", err.Error()) + log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") continue } - if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil { - log.Error().Msgf("error loading model: %s", err.Error()) + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") } } else { log.Warn().Msgf("[startup] failed resolving model '%s'", url) diff --git a/pkg/utils/config.go b/pkg/utils/config.go index a9167ed3..929e1f9f 100644 --- a/pkg/utils/config.go +++ b/pkg/utils/config.go @@ -2,21 +2,22 @@ package utils import ( "encoding/json" - "github.com/rs/zerolog/log" "os" "path/filepath" + + "github.com/rs/zerolog/log" ) func SaveConfig(filePath, fileName string, obj any) { file, err := json.MarshalIndent(obj, "", " ") if err != nil { - log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err) + log.Error().Err(err).Msg("failed to JSON marshal the uploadedFiles") } absolutePath := filepath.Join(filePath, fileName) err = os.WriteFile(absolutePath, file, 0644) if err != nil { - log.Error().Msgf("Failed to save configuration file to %s: %s", absolutePath, err) + log.Error().Err(err).Str("filepath", absolutePath).Msg("failed to save configuration file") } } @@ -31,11 +32,11 @@ func LoadConfig(filePath, fileName string, obj interface{}) { file, err := os.ReadFile(uploadFilePath) if err != nil { - log.Error().Msgf("Failed to read file: %s", err) + log.Error().Err(err).Str("filepath", uploadFilePath).Msg("failed to read file") } else { err = json.Unmarshal(file, &obj) if err != nil { - log.Error().Msgf("Failed to JSON unmarshal the file %s: %v", uploadFilePath, err) + log.Error().Err(err).Str("filepath", uploadFilePath).Msg("failed to parse file as JSON") } } } From f744e1f9317cf66f8812109d1601b0db07dd9883 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 5 Apr 2024 08:41:35 +0200 Subject: [PATCH 0236/2895] :arrow_up: Update ggerganov/whisper.cpp (#1958) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 019078a3..dab1a0cb 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=ac283dbce7d42735e3ed985329037bf23fe180aa +WHISPER_CPP_VERSION?=1d7657f40974e251ea42275e155a8abfb24228ef # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From b2d9e3f7044a3c4853274609012e96670ce45bec Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 5 Apr 2024 08:41:55 +0200 Subject: [PATCH 0237/2895] :arrow_up: Update ggerganov/llama.cpp (#1959) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dab1a0cb..08e2ef3e 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640 +CPPLLAMA_VERSION?=a307375c02cac45cff53cf2520330b43fecc7718 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 8aa5f5a660987553452fb6b160281b5c573e579f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 6 Apr 2024 21:15:25 +0200 Subject: [PATCH 0238/2895] :arrow_up: Update ggerganov/llama.cpp (#1960) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 08e2ef3e..938ee989 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a307375c02cac45cff53cf2520330b43fecc7718 +CPPLLAMA_VERSION?=a8bd14d55717754a1f48313a846a2b16fa998ad2 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 83425532144fbc4fbc9aa734aab8f822a0a2ddf4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 6 Apr 2024 22:56:45 +0200 Subject: [PATCH 0239/2895] fix(llama.cpp): set better defaults for llama.cpp (#1961) fix(defaults): set better defaults for llama.cpp Signed-off-by: Ettore Di Giacinto --- core/backend/options.go | 4 ++-- core/config/backend_config.go | 15 +++++++++++++-- core/http/endpoints/openai/request.go | 6 +++--- core/schema/prediction.go | 10 +++++----- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/core/backend/options.go b/core/backend/options.go index 143a9332..5b303b05 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -144,7 +144,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption MMap: *c.MMap, MainGPU: c.MainGPU, TensorSplit: c.TensorSplit, - TailFreeSamplingZ: float32(c.TFZ), - TypicalP: float32(c.TypicalP), + TailFreeSamplingZ: float32(*c.TFZ), + TypicalP: float32(*c.TypicalP), } } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 25edd343..a90b1c1b 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -205,13 +205,16 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { threads := lo.threads f16 := lo.f16 debug := lo.debug - defaultTopP := 0.7 - defaultTopK := 80 + // https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22 + defaultTopP := 0.95 + defaultTopK := 40 defaultTemp := 0.9 defaultMaxTokens := 2048 defaultMirostat := 2 defaultMirostatTAU := 5.0 defaultMirostatETA := 0.1 + defaultTypicalP := 1.0 + defaultTFZ := 1.0 // Try to offload all GPU layers (if GPU is found) defaultNGPULayers := 99999999 @@ -229,6 +232,14 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.TopK = &defaultTopK } + if cfg.TypicalP == nil { + cfg.TypicalP = &defaultTypicalP + } + + if cfg.TFZ == nil { + cfg.TFZ = &defaultTFZ + } + if cfg.MMap == nil { // MMap is enabled by default cfg.MMap = &trueV diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index c9981204..369fb0b8 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -192,11 +192,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.RepeatPenalty = input.RepeatPenalty } - if input.FrequencyPenalty!= 0 { + if input.FrequencyPenalty != 0 { config.FrequencyPenalty = input.FrequencyPenalty } - if input.PresencePenalty!= 0 { + if input.PresencePenalty != 0 { config.PresencePenalty = input.PresencePenalty } @@ -216,7 +216,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.Seed = input.Seed } - if input.TypicalP != 0 { + if input.TypicalP != nil { config.TypicalP = input.TypicalP } diff --git a/core/schema/prediction.go b/core/schema/prediction.go index 4933f2d2..7e509167 100644 --- a/core/schema/prediction.go +++ b/core/schema/prediction.go @@ -24,12 +24,12 @@ type PredictionOptions struct { RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"` Keep int `json:"n_keep" yaml:"n_keep"` - FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` - PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"` - TFZ float64 `json:"tfz" yaml:"tfz"` + FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` + PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"` + TFZ *float64 `json:"tfz" yaml:"tfz"` - TypicalP float64 `json:"typical_p" yaml:"typical_p"` - Seed *int `json:"seed" yaml:"seed"` + TypicalP *float64 `json:"typical_p" yaml:"typical_p"` + Seed *int `json:"seed" yaml:"seed"` NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"` From ed1378298677ebdc53f2c5e930941ca8e92e90ae Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 7 Apr 2024 10:32:10 +0200 Subject: [PATCH 0240/2895] :arrow_up: Update ggerganov/llama.cpp (#1964) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 938ee989..290a4adc 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a8bd14d55717754a1f48313a846a2b16fa998ad2 +CPPLLAMA_VERSION?=54ea0698fbf87e36a5d68a98c95f6bdd0fb91557 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 74492a81c70603547a717e45d00d532cd2017244 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 7 Apr 2024 11:06:35 +0200 Subject: [PATCH 0241/2895] doc(quickstart): fix typo Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index c56dced5..ff1dc6a7 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -61,10 +61,14 @@ Or with a docker-compose file: version: "3.9" services: api: - image: localai/localai:{{< version >}}-aio-cpu + image: localai/localai:latest-aio-cpu + # For a specific version: + # image: localai/localai:{{< version >}}-aio-cpu # For Nvidia GPUs decomment one of the following (cuda11 or cuda12): - # image: localai/localai:{{< version >}}-aio-gpu-cuda-11 - # image: localai/localai:{{< version >}}-aio-gpu-cuda-12 + # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-11 + # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12 + # image: localai/localai:latest-aio-gpu-nvidia-cuda-11 + # image: localai/localai:latest-aio-gpu-nvidia-cuda-12 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] interval: 1m From f36d86ba6db1507814ae11c169a073d1a84d3b4f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 7 Apr 2024 18:23:47 +0200 Subject: [PATCH 0242/2895] fix(hermes-2-pro-mistral): correct dashes in template to suppress newlines (#1966) Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 21 ++++++++++++--------- aio/gpu-8g/text-to-text.yaml | 21 ++++++++++++--------- aio/intel/text-to-text.yaml | 21 ++++++++++++--------- embedded/models/hermes-2-pro-mistral.yaml | 21 ++++++++++++--------- 4 files changed, 48 insertions(+), 36 deletions(-) diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 8a20109d..6c4ec9e6 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -6,12 +6,14 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} - {{if .Content}}{{.Content}}{{end}} - {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} + {{- if .FunctionCall }}{{end}} + {{- if eq .RoleName "tool" }}{{end }} + {{- if .Content}} + {{.Content}} + {{- end }} + {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} + {{- if .FunctionCall }}{{end }} + {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | @@ -27,12 +29,13 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - <|im_end|> - {{.Input}} + + <|im_end|> + {{.Input -}} <|im_start|>assistant chat: | - {{.Input}} + {{.Input -}} <|im_start|>assistant completion: | {{.Input}} diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 9502cdfe..8d5c84f7 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -6,12 +6,14 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} - {{if .Content}}{{.Content}}{{end}} - {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} + {{- if .FunctionCall }}{{end}} + {{- if eq .RoleName "tool" }}{{end }} + {{- if .Content}} + {{.Content}} + {{- end }} + {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} + {{- if .FunctionCall }}{{end }} + {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | @@ -27,12 +29,13 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - <|im_end|> - {{.Input}} + + <|im_end|> + {{.Input -}} <|im_start|>assistant chat: | - {{.Input}} + {{.Input -}} <|im_start|>assistant completion: | {{.Input}} diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index d2316745..a7cb5b4d 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -7,12 +7,14 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} - {{if .Content}}{{.Content}}{{end}} - {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} + {{- if .FunctionCall }}{{end}} + {{- if eq .RoleName "tool" }}{{end }} + {{- if .Content}} + {{.Content}} + {{- end }} + {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} + {{- if .FunctionCall }}{{end }} + {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | @@ -28,12 +30,13 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - <|im_end|> - {{.Input}} + + <|im_end|> + {{.Input -}} <|im_start|>assistant chat: | - {{.Input}} + {{.Input -}} <|im_start|>assistant completion: | {{.Input}} diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index eb75b97c..7bfa9418 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -6,12 +6,14 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} - {{if .Content}}{{.Content}}{{end}} - {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} - {{ if .FunctionCall }}{{end}} - {{ if eq .RoleName "tool" }}{{end}} + {{- if .FunctionCall }}{{end}} + {{- if eq .RoleName "tool" }}{{end }} + {{- if .Content}} + {{.Content}} + {{- end }} + {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} + {{- if .FunctionCall }}{{end }} + {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | @@ -27,12 +29,13 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - <|im_end|> - {{.Input}} + + <|im_end|> + {{.Input -}} <|im_start|>assistant chat: | - {{.Input}} + {{.Input -}} <|im_start|>assistant completion: | {{.Input}} From a153b628c2444fa24cb901ef27f3fe0cb3fcde17 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 8 Apr 2024 08:38:17 +0200 Subject: [PATCH 0243/2895] :arrow_up: Update ggerganov/whisper.cpp (#1969) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 290a4adc..d11a47bc 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=1d7657f40974e251ea42275e155a8abfb24228ef +WHISPER_CPP_VERSION?=13c22321d1ac758ce68a429c23104e234b440769 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From efcca15d3f9db3da6ceecf1c49224674dcd8f13f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 8 Apr 2024 08:38:47 +0200 Subject: [PATCH 0244/2895] :arrow_up: Update ggerganov/llama.cpp (#1970) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d11a47bc..10791a4b 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=54ea0698fbf87e36a5d68a98c95f6bdd0fb91557 +CPPLLAMA_VERSION?=855f54402e866ed19d8d675b56a81c844c64b325 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a38618db0278b9460688e4f6275009d3c601a2bc Mon Sep 17 00:00:00 2001 From: fakezeta Date: Mon, 8 Apr 2024 22:33:51 +0200 Subject: [PATCH 0245/2895] fix regression #1971 (#1972) fixes regression #1971 introduced by intel_extension_for_transformers==1.4 --- backend/python/transformers/transformers_server.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 04324d9b..c7f1cd75 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -22,11 +22,7 @@ import torch.cuda XPU=os.environ.get("XPU", "0") == "1" if XPU: - import intel_extension_for_pytorch as ipex - from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer - from optimum.intel.openvino import OVModelForCausalLM - from openvino.runtime import Core else: from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer @@ -115,6 +111,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): try: if request.Type == "AutoModelForCausalLM": if XPU: + import intel_extension_for_pytorch as ipex + from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM + device_map="xpu" compute=torch.float16 if request.Quantization == "xpu_4bit": @@ -141,6 +140,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): device_map=device_map, torch_dtype=compute) elif request.Type == "OVModelForCausalLM": + from optimum.intel.openvino import OVModelForCausalLM + from openvino.runtime import Core + if "GPU" in Core().available_devices: device_map="GPU" else: From 195be10050d552d83e3f0729b367d0359edf60d9 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 8 Apr 2024 23:26:52 +0200 Subject: [PATCH 0246/2895] :arrow_up: Update ggerganov/llama.cpp (#1973) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 10791a4b..d42e1a99 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=855f54402e866ed19d8d675b56a81c844c64b325 +CPPLLAMA_VERSION?=cc4a95426d17417d3c83f12bdb514fbe8abe2a88 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 2bbb221fb18cb119c384f86739c1433cec8a491b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 8 Apr 2024 21:28:59 +0000 Subject: [PATCH 0247/2895] tests(petals): temp disable --- .github/workflows/test-extra.yml | 52 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 6f92c806..7689f06d 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -133,34 +133,34 @@ jobs: - tests-petals: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ - sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ - gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ - sudo apt-get update && \ - sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev + # tests-petals: + # runs-on: ubuntu-latest + # steps: + # - name: Clone + # uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Dependencies + # run: | + # sudo apt-get update + # sudo apt-get install build-essential ffmpeg + # curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + # sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + # gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + # sudo apt-get update && \ + # sudo apt-get install -y conda + # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y libopencv-dev - sudo rm -rfv /usr/bin/conda || true + # sudo rm -rfv /usr/bin/conda || true - - name: Test petals - run: | - export PATH=$PATH:/opt/conda/bin - make --jobs=5 --output-sync=target -C backend/python/petals - make --jobs=5 --output-sync=target -C backend/python/petals test + # - name: Test petals + # run: | + # export PATH=$PATH:/opt/conda/bin + # make --jobs=5 --output-sync=target -C backend/python/petals + # make --jobs=5 --output-sync=target -C backend/python/petals test From cc3d601836891fc4694745929f90204c684b4152 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 9 Apr 2024 09:49:11 +0200 Subject: [PATCH 0248/2895] ci: fixup latest image push Signed-off-by: Ettore Di Giacinto --- .github/workflows/image_build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index cba78933..bd244dcf 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -282,6 +282,7 @@ jobs: docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }} + docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }} - name: Latest AIO tag # run this on branches, when it is a tag and there is a latest-image defined if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag' @@ -290,7 +291,8 @@ jobs: docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} - + docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} + - name: job summary run: | echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY From aeb3f835aef7c80da7ad2ccae433d11449493061 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:07:21 +0200 Subject: [PATCH 0249/2895] :arrow_up: Update docs version mudler/LocalAI (#1978) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index b6372479..cc0478ca 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.11.0" + "version": "v2.12.1" } From 951e39d36c06bb14b3b95b27309d7be809f4a3f4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:07:41 +0200 Subject: [PATCH 0250/2895] :arrow_up: Update ggerganov/llama.cpp (#1979) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d42e1a99..b43541ff 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=cc4a95426d17417d3c83f12bdb514fbe8abe2a88 +CPPLLAMA_VERSION?=1b67731e184e27a465b8c5476061294a4af668ea # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 7e2f8bb4083eea3939072dab2cb47261b1b97603 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:08:00 +0200 Subject: [PATCH 0251/2895] :arrow_up: Update ggerganov/whisper.cpp (#1980) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b43541ff..337ebc64 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=13c22321d1ac758ce68a429c23104e234b440769 +WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From d692b2c32a400a4aa0c6df9a51aa4f3cbe73edff Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 10:31:59 +0200 Subject: [PATCH 0252/2895] ci: push latest images for dockerhub (#1984) Fixes: #1983 Signed-off-by: Ettore Di Giacinto --- .github/workflows/image_build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index bd244dcf..b0684a4c 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -280,6 +280,7 @@ jobs: run: | docker pull localai/localai:${{ steps.meta.outputs.version }} docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }} + docker push localai/localai:${{ inputs.latest-image }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }} docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }} @@ -289,6 +290,7 @@ jobs: run: | docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }} + docker push localai/localai:${{ inputs.latest-image-aio }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} From d23e73b11828b59a608174dc679eb5a3c2d2f42f Mon Sep 17 00:00:00 2001 From: "Sebastian.W" Date: Wed, 10 Apr 2024 18:36:10 +0800 Subject: [PATCH 0253/2895] fix(autogptq): do not use_triton with qwen-vl (#1985) * Enhance autogptq backend to support VL models * update dependencies for autogptq * remove redundant auto-gptq dependency * Convert base64 to image_url for Qwen-VL model * implemented model inference for qwen-vl * remove user prompt from generated answer * fixed write image error * fixed use_triton issue when loading Qwen-VL model --------- Co-authored-by: Binghua Wu --- backend/python/autogptq/autogptq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index bbafdd92..c7c35028 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -39,7 +39,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.model_name = "Qwen-VL-Chat" model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=request.TrustRemoteCode, - use_triton=request.UseTriton, device_map="auto").eval() else: model = AutoGPTQForCausalLM.from_quantized(model_path, From 36da11a0ee4cdf575012c669150d5f617362e619 Mon Sep 17 00:00:00 2001 From: Koen Farell Date: Wed, 10 Apr 2024 14:25:26 +0300 Subject: [PATCH 0254/2895] deps: Update version of vLLM to add support of Cohere Command_R model in vLLM inference (#1975) * Update vLLM version to add support of Command_R Signed-off-by: Koen Farell * fix: Fixed vllm version from requirements Signed-off-by: Koen Farell * chore: Update transformers-rocm.yml Signed-off-by: Koen Farell * chore: Update transformers.yml version of vllm Signed-off-by: Koen Farell --------- Signed-off-by: Koen Farell --- backend/python/common-env/transformers/transformers-nvidia.yml | 2 +- backend/python/common-env/transformers/transformers-rocm.yml | 2 +- backend/python/common-env/transformers/transformers.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index e8d8155b..e12b5dbb 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -116,7 +116,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.3.2 + - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index fa245bf4..48fac8bf 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -104,7 +104,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.3.2 + - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 3b3b8fe7..843b13fa 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -108,7 +108,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.3.2 + - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 From 93f51d80d41b3b3748da41ad4cb7baf8c762890c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 16:29:46 +0200 Subject: [PATCH 0255/2895] Update gpt-vision.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/gpt-vision.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md index 3afcab16..827e2c08 100644 --- a/docs/content/docs/features/gpt-vision.md +++ b/docs/content/docs/features/gpt-vision.md @@ -22,6 +22,17 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' ``` +Grammars and function tools can be used as well in conjunction with vision APIs: + +```bash + curl http://10.1.0.36:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")", + "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +``` + ### Setup +All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case. + To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI/blob/master/examples/configurations/README.md#llava). + From 636d487dc84c6f1d99ba7630d8851865091c42cb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 16:30:03 +0200 Subject: [PATCH 0256/2895] Update gpt-vision.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/gpt-vision.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md index 827e2c08..9e021273 100644 --- a/docs/content/docs/features/gpt-vision.md +++ b/docs/content/docs/features/gpt-vision.md @@ -25,7 +25,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso Grammars and function tools can be used as well in conjunction with vision APIs: ```bash - curl http://10.1.0.36:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")", "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' ``` From 92005b9c0285f31e7f29ca4f37e6afa194745cf0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 16:30:57 +0200 Subject: [PATCH 0257/2895] Update openai-functions.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/openai-functions.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/docs/features/openai-functions.md index 843524f4..435101ee 100644 --- a/docs/content/docs/features/openai-functions.md +++ b/docs/content/docs/features/openai-functions.md @@ -144,6 +144,15 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso }' ``` +Grammars and function tools can be used as well in conjunction with vision APIs: + +```bash + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")", + "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +``` + + ## 💡 Examples A full e2e example with `docker-compose` is available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/functions). From 24d7dadfed6ddf19e91652c3eb45d04ad1d15584 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 11 Apr 2024 02:19:24 -0500 Subject: [PATCH 0258/2895] feat: kong cli refactor fixes #1955 (#1974) * feat: migrate to alecthomas/kong for CLI Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: bring in new flag for granular log levels Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * chore: go mod tidy Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: allow loading cli flag values from ["./localai.yaml", "~/.config/localai.yaml", "/etc/localai.yaml"] in that order Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: load from .env file instead of a yaml file Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: better loading for environment files Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat(doc): add initial documentation about configuration Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove test log lines Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: integrate new documentation into existing pages Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: add documentation on .env files Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: cleanup some documentation table errors Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: refactor CLI logic out to it's own package under core/cli Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .env | 38 +- core/cli/cli.go | 20 + core/cli/models.go | 74 +++ core/cli/run.go | 155 +++++ core/cli/transcript.go | 54 ++ core/cli/tts.go | 61 ++ docs/content/docs/advanced/advanced-usage.md | 101 +++- go.mod | 16 +- go.sum | 61 +- main.go | 595 +++---------------- 10 files changed, 552 insertions(+), 623 deletions(-) create mode 100644 core/cli/cli.go create mode 100644 core/cli/models.go create mode 100644 core/cli/run.go create mode 100644 core/cli/transcript.go create mode 100644 core/cli/tts.go diff --git a/.env b/.env index 82a64e3d..35d4f2d7 100644 --- a/.env +++ b/.env @@ -1,33 +1,33 @@ ## Set number of threads. ## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably. -# THREADS=14 +# LOCALAI_THREADS=14 ## Specify a different bind address (defaults to ":8080") -# ADDRESS=127.0.0.1:8080 +# LOCALAI_ADDRESS=127.0.0.1:8080 ## Default models context size -# CONTEXT_SIZE=512 +# LOCALAI_CONTEXT_SIZE=512 # ## Define galleries. ## models will to install will be visible in `/models/available` -# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}] +# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}] ## CORS settings -# CORS=true -# CORS_ALLOW_ORIGINS=* +# LOCALAI_CORS=true +# LOCALAI_CORS_ALLOW_ORIGINS=* ## Default path for models # -# MODELS_PATH=/models +# LOCALAI_MODELS_PATH=/models ## Enable debug mode -# DEBUG=true +# LOCALAI_LOG_LEVEL=debug ## Disables COMPEL (Diffusers) # COMPEL=0 ## Enable/Disable single backend (useful if only one GPU is available) -# SINGLE_ACTIVE_BACKEND=true +# LOCALAI_SINGLE_ACTIVE_BACKEND=true ## Specify a build type. Available: cublas, openblas, clblas. ## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit. @@ -46,13 +46,13 @@ # GO_TAGS=stablediffusion ## Path where to store generated images -# IMAGE_PATH=/tmp +# LOCALAI_IMAGE_PATH=/tmp/generated/images ## Specify a default upload limit in MB (whisper) -# UPLOAD_LIMIT +# LOCALAI_UPLOAD_LIMIT=15 ## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/) -# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py +# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py ### Advanced settings ### ### Those are not really used by LocalAI, but from components in the stack ### @@ -72,18 +72,18 @@ # LLAMACPP_PARALLEL=1 ### Enable to run parallel requests -# PARALLEL_REQUESTS=true +# LOCALAI_PARALLEL_REQUESTS=true ### Watchdog settings ### # Enables watchdog to kill backends that are inactive for too much time -# WATCHDOG_IDLE=true -# -# Enables watchdog to kill backends that are busy for too much time -# WATCHDOG_BUSY=true +# LOCALAI_WATCHDOG_IDLE=true # # Time in duration format (e.g. 1h30m) after which a backend is considered idle -# WATCHDOG_IDLE_TIMEOUT=5m +# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m +# +# Enables watchdog to kill backends that are busy for too much time +# LOCALAI_WATCHDOG_BUSY=true # # Time in duration format (e.g. 1h30m) after which a backend is considered busy -# WATCHDOG_BUSY_TIMEOUT=5m \ No newline at end of file +# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m \ No newline at end of file diff --git a/core/cli/cli.go b/core/cli/cli.go new file mode 100644 index 00000000..5e757f64 --- /dev/null +++ b/core/cli/cli.go @@ -0,0 +1,20 @@ +package cli + +import "embed" + +type Context struct { + Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"` + LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"` + + // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI + BackendAssets embed.FS `kong:"-"` +} + +var CLI struct { + Context `embed:""` + + Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"` + Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"` + TTS TTSCMD `cmd:"" help:"Convert text to speech"` + Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"` +} diff --git a/core/cli/models.go b/core/cli/models.go new file mode 100644 index 00000000..62ef366b --- /dev/null +++ b/core/cli/models.go @@ -0,0 +1,74 @@ +package cli + +import ( + "encoding/json" + "fmt" + + "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/rs/zerolog/log" + "github.com/schollz/progressbar/v3" +) + +type ModelsCMDFlags struct { + Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` +} + +type ModelsList struct { + ModelsCMDFlags `embed:""` +} + +type ModelsInstall struct { + ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` + + ModelsCMDFlags `embed:""` +} + +type ModelsCMD struct { + List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"` + Install ModelsInstall `cmd:"" help:"Install a model from the gallery"` +} + +func (ml *ModelsList) Run(ctx *Context) error { + var galleries []gallery.Gallery + if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil { + log.Error().Err(err).Msg("unable to load galleries") + } + + models, err := gallery.AvailableGalleryModels(galleries, ml.ModelsPath) + if err != nil { + return err + } + for _, model := range models { + if model.Installed { + fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name) + } else { + fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name) + } + } + return nil +} + +func (mi *ModelsInstall) Run(ctx *Context) error { + modelName := mi.ModelArgs[0] + + var galleries []gallery.Gallery + if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil { + log.Error().Err(err).Msg("unable to load galleries") + } + + progressBar := progressbar.NewOptions( + 1000, + progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)), + progressbar.OptionShowBytes(false), + progressbar.OptionClearOnFinish(), + ) + progressCallback := func(fileName string, current string, total string, percentage float64) { + progressBar.Set(int(percentage * 10)) + } + err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback) + if err != nil { + return err + } + return nil +} diff --git a/core/cli/run.go b/core/cli/run.go new file mode 100644 index 00000000..09d09979 --- /dev/null +++ b/core/cli/run.go @@ -0,0 +1,155 @@ +package cli + +import ( + "fmt" + "os" + "strings" + "time" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http" + "github.com/go-skynet/LocalAI/core/startup" + "github.com/rs/zerolog/log" +) + +type RunCMD struct { + ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` + + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"` + AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"` + UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` + ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` + LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` + // The alias on this option is there to preserve functionality with the old `--config-file` parameter + ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"` + + Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"` + AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"` + RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"` + PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"` + Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"` + PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"` + + F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"` + Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" default:"4" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"` + ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"` + + Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` + CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` + CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` + UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` + APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` + DisableWelcome bool `env:"LOCALAI_DISABLE_WELCOME,DISABLE_WELCOME" default:"false" help:"Disable welcome pages" group:"api"` + + ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` + SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` + PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"` + ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"` + EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"` + WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` + EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` + WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` +} + +func (r *RunCMD) Run(ctx *Context) error { + opts := []config.AppOption{ + config.WithConfigFile(r.ModelsConfigFile), + config.WithJSONStringPreload(r.PreloadModels), + config.WithYAMLConfigPreload(r.PreloadModelsConfig), + config.WithModelPath(r.ModelsPath), + config.WithContextSize(r.ContextSize), + config.WithDebug(ctx.Debug), + config.WithImageDir(r.ImagePath), + config.WithAudioDir(r.AudioPath), + config.WithUploadDir(r.UploadPath), + config.WithConfigsDir(r.ConfigPath), + config.WithF16(r.F16), + config.WithStringGalleries(r.Galleries), + config.WithModelLibraryURL(r.RemoteLibrary), + config.WithDisableMessage(false), + config.WithCors(r.CORS), + config.WithCorsAllowOrigins(r.CORSAllowOrigins), + config.WithThreads(r.Threads), + config.WithBackendAssets(ctx.BackendAssets), + config.WithBackendAssetsOutput(r.BackendAssetsPath), + config.WithUploadLimitMB(r.UploadLimit), + config.WithApiKeys(r.APIKeys), + config.WithModelsURL(append(r.Models, r.ModelArgs...)...), + } + + idleWatchDog := r.EnableWatchdogIdle + busyWatchDog := r.EnableWatchdogBusy + + if r.DisableWelcome { + opts = append(opts, config.DisableWelcomePage) + } + + if idleWatchDog || busyWatchDog { + opts = append(opts, config.EnableWatchDog) + if idleWatchDog { + opts = append(opts, config.EnableWatchDogIdleCheck) + dur, err := time.ParseDuration(r.WatchdogIdleTimeout) + if err != nil { + return err + } + opts = append(opts, config.SetWatchDogIdleTimeout(dur)) + } + if busyWatchDog { + opts = append(opts, config.EnableWatchDogBusyCheck) + dur, err := time.ParseDuration(r.WatchdogBusyTimeout) + if err != nil { + return err + } + opts = append(opts, config.SetWatchDogBusyTimeout(dur)) + } + } + if r.ParallelRequests { + opts = append(opts, config.EnableParallelBackendRequests) + } + if r.SingleActiveBackend { + opts = append(opts, config.EnableSingleBackend) + } + + // split ":" to get backend name and the uri + for _, v := range r.ExternalGRPCBackends { + backend := v[:strings.IndexByte(v, ':')] + uri := v[strings.IndexByte(v, ':')+1:] + opts = append(opts, config.WithExternalBackend(backend, uri)) + } + + if r.AutoloadGalleries { + opts = append(opts, config.EnableGalleriesAutoload) + } + + if r.PreloadBackendOnly { + _, _, _, err := startup.Startup(opts...) + return err + } + + cl, ml, options, err := startup.Startup(opts...) + + if err != nil { + return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) + } + + // Watch the configuration directory + // If the directory does not exist, we don't watch it + if _, err := os.Stat(r.LocalaiConfigDir); err == nil { + closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) + defer closeConfigWatcherFn() + + if err != nil { + return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir) + } + } + + appHTTP, err := http.App(cl, ml, options) + if err != nil { + log.Error().Err(err).Msg("error during HTTP App construction") + return err + } + + return appHTTP.Listen(r.Address) +} diff --git a/core/cli/transcript.go b/core/cli/transcript.go new file mode 100644 index 00000000..9f36a77c --- /dev/null +++ b/core/cli/transcript.go @@ -0,0 +1,54 @@ +package cli + +import ( + "context" + "errors" + "fmt" + + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type TranscriptCMD struct { + Filename string `arg:""` + + Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"` + Model string `short:"m" required:"" help:"Model name to run the TTS"` + Language string `short:"l" help:"Language of the audio file"` + Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` +} + +func (t *TranscriptCMD) Run(ctx *Context) error { + opts := &config.ApplicationConfig{ + ModelPath: t.ModelsPath, + Context: context.Background(), + AssetsDestination: t.BackendAssetsPath, + } + + cl := config.NewBackendConfigLoader() + ml := model.NewModelLoader(opts.ModelPath) + if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil { + return err + } + + c, exists := cl.GetBackendConfig(t.Model) + if !exists { + return errors.New("model not found") + } + + c.Threads = &t.Threads + + defer ml.StopAllGRPC() + + tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) + if err != nil { + return err + } + for _, segment := range tr.Segments { + fmt.Println(segment.Start.String(), "-", segment.Text) + } + return nil +} diff --git a/core/cli/tts.go b/core/cli/tts.go new file mode 100644 index 00000000..1d8fd3a3 --- /dev/null +++ b/core/cli/tts.go @@ -0,0 +1,61 @@ +package cli + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type TTSCMD struct { + Text []string `arg:""` + + Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"` + Model string `short:"m" required:"" help:"Model name to run the TTS"` + Voice string `short:"v" help:"Voice name to run the TTS"` + OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` +} + +func (t *TTSCMD) Run(ctx *Context) error { + outputFile := t.OutputFile + outputDir := t.BackendAssetsPath + if outputFile != "" { + outputDir = filepath.Dir(outputFile) + } + + text := strings.Join(t.Text, " ") + + opts := &config.ApplicationConfig{ + ModelPath: t.ModelsPath, + Context: context.Background(), + AudioDir: outputDir, + AssetsDestination: t.BackendAssetsPath, + } + ml := model.NewModelLoader(opts.ModelPath) + + defer ml.StopAllGRPC() + + options := config.BackendConfig{} + options.SetDefaults() + + filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options) + if err != nil { + return err + } + if outputFile != "" { + if err := os.Rename(filePath, outputFile); err != nil { + return err + } + fmt.Printf("Generate file %s\n", outputFile) + } else { + fmt.Printf("Generate file %s\n", filePath) + } + return nil +} diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index c9926bab..dace5803 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -382,35 +382,84 @@ docker run --env-file .env localai ### CLI parameters -You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. +You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. Any command line parameter can be specified via an environment variable. +#### Global Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| -h, --help | | Show context-sensitive help. | +| --log-level | info | Set the level of logs to output [error,warn,info,debug] | $LOCALAI_LOG_LEVEL | -| Parameter | Environmental Variable | Default Variable | Description | -| ------------------------------ | ------------------------------- | -------------------------------------------------- | ------------------------------------------------------------------- | -| --f16 | $F16 | false | Enable f16 mode | -| --debug | $DEBUG | false | Enable debug mode | -| --cors | $CORS | false | Enable CORS support | -| --cors-allow-origins value | $CORS_ALLOW_ORIGINS | | Specify origins allowed for CORS | -| --threads value | $THREADS | 4 | Number of threads to use for parallel computation | -| --models-path value | $MODELS_PATH | ./models | Path to the directory containing models used for inferencing | -| --preload-models value | $PRELOAD_MODELS | | List of models to preload in JSON format at startup | -| --preload-models-config value | $PRELOAD_MODELS_CONFIG | | A config with a list of models to apply at startup. Specify the path to a YAML config file | -| --config-file value | $CONFIG_FILE | | Path to the config file | -| --address value | $ADDRESS | :8080 | Specify the bind address for the API server | -| --image-path value | $IMAGE_PATH | | Path to the directory used to store generated images | -| --context-size value | $CONTEXT_SIZE | 512 | Default context size of the model | -| --upload-limit value | $UPLOAD_LIMIT | 15 | Default upload limit in megabytes (audio file upload) | -| --galleries | $GALLERIES | | Allows to set galleries from command line | -|--parallel-requests | $PARALLEL_REQUESTS | false | Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm | -| --single-active-backend | $SINGLE_ACTIVE_BACKEND | false | Allow only one backend to be running | -| --api-keys value | $API_KEY | empty | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys. -| --enable-watchdog-idle | $WATCHDOG_IDLE | false | Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long. (default: false) [$WATCHDOG_IDLE] -| --enable-watchdog-busy | $WATCHDOG_BUSY | false | Enable watchdog for stopping busy backends that exceed a defined threshold.| -| --watchdog-busy-timeout value | $WATCHDOG_BUSY_TIMEOUT | 5m | Watchdog timeout. This will restart the backend if it crashes. | -| --watchdog-idle-timeout value | $WATCHDOG_IDLE_TIMEOUT | 15m | Watchdog idle timeout. This will restart the backend if it crashes. | -| --preload-backend-only | $PRELOAD_BACKEND_ONLY | false | If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups. | -| --external-grpc-backends | EXTERNAL_GRPC_BACKENDS | none | Comma separated list of external gRPC backends to use. Format: `name:host:port` or `name:/path/to/file` | +#### Storage Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --models-path | /home/cryptk/Documents/sourcecode/LocalAI/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | +| --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH | +| --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH | +| --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH | +| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | +| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | +| --localai-config-dir | /home/cryptk/Documents/sourcecode/LocalAI/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | +| --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE | +#### Models Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --galleries | STRING | JSON list of galleries | $LOCALAI_GALLERIES | +| --autoload-galleries | | | $LOCALAI_AUTOLOAD_GALLERIES | +| --remote-library | "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" | A LocalAI remote library URL | $LOCALAI_REMOTE_LIBRARY | +| --preload-models | STRING | A List of models to apply in JSON at start |$LOCALAI_PRELOAD_MODELS | +| --models | MODELS,... | A List of model configuration URLs to load | $LOCALAI_MODELS | +| --preload-models-config | STRING | A List of models to apply at startup. Path to a YAML config file | $LOCALAI_PRELOAD_MODELS_CONFIG | + +#### Performance Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --f16 | | Enable GPU acceleration | $LOCALAI_F16 | +| -t, --threads | 4 | Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested | $LOCALAI_THREADS | +| --context-size | 512 | Default context size for models | $LOCALAI_CONTEXT_SIZE | + +#### API Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --address | ":8080" | Bind address for the API server | $LOCALAI_ADDRESS | +| --cors | | | $LOCALAI_CORS | +| --cors-allow-origins | | | $LOCALAI_CORS_ALLOW_ORIGINS | +| --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT | +| --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY | +| --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME | + +#### Backend Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --parallel-requests | | Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm) | $LOCALAI_PARALLEL_REQUESTS | +| --single-active-backend | | Allow only one backend to be run at a time | $LOCALAI_SINGLE_ACTIVE_BACKEND | +| --preload-backend-only | | Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups) | $LOCALAI_PRELOAD_BACKEND_ONLY | +| --external-grpc-backends | EXTERNAL-GRPC-BACKENDS,... | A list of external grpc backends | $LOCALAI_EXTERNAL_GRPC_BACKENDS | +| --enable-watchdog-idle | | Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout | $LOCALAI_WATCHDOG_IDLE | +| --watchdog-idle-timeout | 15m | Threshold beyond which an idle backend should be stopped | $LOCALAI_WATCHDOG_IDLE_TIMEOUT, $WATCHDOG_IDLE_TIMEOUT | +| --enable-watchdog-busy | | Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout | $LOCALAI_WATCHDOG_BUSY | +| --watchdog-busy-timeout | 5m | Threshold beyond which a busy backend should be stopped | $LOCALAI_WATCHDOG_BUSY_TIMEOUT | + +### .env files + +Any settings being provided by an Environment Variable can also be provided from within .env files. There are several locations that will be checked for relevant .env files. In order of precedence they are: + +- .env within the current directory +- localai.env within the current directory +- localai.env within the home directory +- .config/localai.env within the home directory +- /etc/localai.env + +Environment variables within files earlier in the list will take precedence over environment variables defined in files later in the list. + +An example .env file is: + +``` +LOCALAI_THREADS=10 +LOCALAI_MODELS_PATH=/mnt/storage/localai/models +LOCALAI_F16=true +``` ### Extra backends diff --git a/go.mod b/go.mod index 4dd207c7..fac7acfd 100644 --- a/go.mod +++ b/go.mod @@ -13,8 +13,8 @@ require ( github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 github.com/gofiber/fiber/v2 v2.52.0 + github.com/gofiber/swagger v1.0.0 github.com/gofiber/template/html/v2 v2.1.1 - github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 github.com/google/uuid v1.5.0 github.com/hashicorp/go-multierror v1.1.1 github.com/hpcloud/tail v1.0.0 @@ -30,11 +30,12 @@ require ( github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.17.0 github.com/rs/zerolog v1.31.0 + github.com/russross/blackfriday v1.6.0 github.com/sashabaranov/go-openai v1.20.4 github.com/schollz/progressbar/v3 v3.13.1 github.com/stretchr/testify v1.9.0 + github.com/swaggo/swag v1.16.3 github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701 - github.com/urfave/cli/v2 v2.27.1 github.com/valyala/fasthttp v1.51.0 go.opentelemetry.io/otel v1.19.0 go.opentelemetry.io/otel/exporters/prometheus v0.42.0 @@ -64,8 +65,6 @@ require ( github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect - github.com/PuerkitoBio/purell v1.2.1 // indirect - github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/aymanbagabas/go-osc52 v1.0.3 // indirect github.com/aymerick/douceur v0.2.0 // indirect @@ -85,7 +84,6 @@ require ( github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/spec v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect - github.com/gofiber/swagger v1.0.0 // indirect github.com/gofiber/template v1.8.3 // indirect github.com/gofiber/utils v1.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -119,12 +117,10 @@ require ( github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect - github.com/russross/blackfriday v1.6.0 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/cast v1.3.1 // indirect github.com/swaggo/files/v2 v2.0.0 // indirect - github.com/swaggo/swag v1.16.3 // indirect github.com/ulikunitz/xz v0.5.9 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect @@ -140,12 +136,11 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect ) require ( + github.com/alecthomas/kong v0.9.0 github.com/andybalholm/brotli v1.0.5 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/go-audio/audio v1.0.0 // indirect github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect @@ -153,16 +148,15 @@ require ( github.com/google/go-cmp v0.6.0 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect + github.com/joho/godotenv v1.5.1 github.com/klauspost/compress v1.17.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 github.com/rivo/uniseg v0.2.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect - github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect golang.org/x/net v0.22.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/go.sum b/go.sum index f81f10c8..dc08c465 100644 --- a/go.sum +++ b/go.sum @@ -13,12 +13,14 @@ github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2y github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= -github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28= -github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= +github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= +github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA= +github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os= +github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= +github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= @@ -45,10 +47,6 @@ github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1A github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= @@ -95,8 +93,6 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= -github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY= -github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= @@ -104,8 +100,6 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw= -github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw= github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE= github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= @@ -131,8 +125,6 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 h1:k4Tw0nt6lwro3Uin8eqoET7MDA4JnT8YgbCjc/g5E3k= -github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -147,8 +139,6 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLe github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= @@ -157,6 +147,8 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= @@ -165,6 +157,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1: github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= @@ -172,8 +166,6 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM= github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= @@ -198,7 +190,6 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= @@ -296,8 +287,6 @@ github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWR github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= @@ -328,7 +317,6 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= @@ -349,14 +337,8 @@ github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs= -github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= -github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= -github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= -github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e9M= -github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA= github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= @@ -371,10 +353,6 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17 github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= -github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw= -github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -401,15 +379,11 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= -golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= -golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -425,8 +399,6 @@ golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= -golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= -golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -435,9 +407,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -471,16 +442,12 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= -golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -488,8 +455,6 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -499,8 +464,6 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= -golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -541,5 +504,3 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo= gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/main.go b/main.go index 53966ba5..8b5696d1 100644 --- a/main.go +++ b/main.go @@ -1,41 +1,30 @@ package main import ( - "context" - "encoding/json" - "errors" - "fmt" "os" "os/signal" "path/filepath" - "strings" "syscall" - "time" - - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - - "github.com/go-skynet/LocalAI/core/http" - "github.com/go-skynet/LocalAI/core/startup" + "github.com/alecthomas/kong" + "github.com/go-skynet/LocalAI/core/cli" "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/pkg/gallery" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/joho/godotenv" + "github.com/rs/zerolog" "github.com/rs/zerolog/log" - progressbar "github.com/schollz/progressbar/v3" - "github.com/urfave/cli/v2" _ "github.com/go-skynet/LocalAI/swagger" ) -const ( - remoteLibraryURL = "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" -) - func main() { + var err error + + // Initialize zerolog at a level of INFO, we will set the desired level after we parse the CLI options log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) - // clean up process + zerolog.SetGlobalLevel(zerolog.InfoLevel) + + // Catch signals from the OS requesting us to exit go func() { c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked signal.Notify(c, os.Interrupt, syscall.SIGTERM) @@ -43,511 +32,83 @@ func main() { os.Exit(1) }() - path, err := os.Getwd() - if err != nil { - log.Error().Err(err).Msg("failed to get current directory") - os.Exit(1) + // handle loading environment variabled from .env files + envFiles := []string{".env", "localai.env"} + homeDir, err := os.UserHomeDir() + if err == nil { + envFiles = append(envFiles, filepath.Join(homeDir, "localai.env"), filepath.Join(homeDir, ".config/localai.env")) + } + envFiles = append(envFiles, "/etc/localai.env") + + for _, envFile := range envFiles { + if _, err := os.Stat(envFile); err == nil { + log.Info().Str("envFile", envFile).Msg("loading environment variables from file") + godotenv.Load(envFile) + } } - app := &cli.App{ - Name: "LocalAI", - Version: internal.PrintableVersion(), - Usage: "OpenAI, OSS alternative. Drop-in compatible API for running LLM, GPT and genAI models locally on CPU, GPUs with consumer grade hardware. Supported server endpoints: OpenAI, Elevenlabs", - Flags: []cli.Flag{ - &cli.BoolFlag{ - Name: "f16", - EnvVars: []string{"F16"}, - }, - &cli.BoolFlag{ - Name: "autoload-galleries", - EnvVars: []string{"AUTOLOAD_GALLERIES"}, - }, - &cli.BoolFlag{ - Name: "debug", - EnvVars: []string{"DEBUG"}, - }, - &cli.BoolFlag{ - Name: "single-active-backend", - EnvVars: []string{"SINGLE_ACTIVE_BACKEND"}, - Usage: "Allow only one backend to be running.", - }, - &cli.BoolFlag{ - Name: "parallel-requests", - EnvVars: []string{"PARALLEL_REQUESTS"}, - Usage: "Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm", - }, - &cli.BoolFlag{ - Name: "cors", - EnvVars: []string{"CORS"}, - }, - &cli.StringFlag{ - Name: "cors-allow-origins", - EnvVars: []string{"CORS_ALLOW_ORIGINS"}, - }, - &cli.IntFlag{ - Name: "threads", - Usage: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.", - EnvVars: []string{"THREADS"}, - Value: 4, - }, - &cli.StringFlag{ - Name: "models-path", - Usage: "Path containing models used for inferencing", - EnvVars: []string{"MODELS_PATH"}, - Value: filepath.Join(path, "models"), - }, - &cli.StringFlag{ - Name: "galleries", - Usage: "JSON list of galleries", - EnvVars: []string{"GALLERIES"}, - }, - &cli.StringFlag{ - Name: "remote-library", - Usage: "A LocalAI remote library URL", - EnvVars: []string{"REMOTE_LIBRARY"}, - Value: remoteLibraryURL, - }, - &cli.StringFlag{ - Name: "preload-models", - Usage: "A List of models to apply in JSON at start", - EnvVars: []string{"PRELOAD_MODELS"}, - }, - &cli.StringSliceFlag{ - Name: "models", - Usage: "A List of models URLs configurations.", - EnvVars: []string{"MODELS"}, - }, - &cli.StringFlag{ - Name: "preload-models-config", - Usage: "A List of models to apply at startup. Path to a YAML config file", - EnvVars: []string{"PRELOAD_MODELS_CONFIG"}, - }, - &cli.StringFlag{ - Name: "config-file", - Usage: "Config file", - EnvVars: []string{"CONFIG_FILE"}, - }, - &cli.StringFlag{ - Name: "address", - Usage: "Bind address for the API server.", - EnvVars: []string{"ADDRESS"}, - Value: ":8080", - }, - &cli.StringFlag{ - Name: "image-path", - Usage: "Image directory", - EnvVars: []string{"IMAGE_PATH"}, - Value: "/tmp/generated/images", - }, - &cli.StringFlag{ - Name: "audio-path", - Usage: "audio directory", - EnvVars: []string{"AUDIO_PATH"}, - Value: "/tmp/generated/audio", - }, - &cli.StringFlag{ - Name: "upload-path", - Usage: "Path to store uploads from files api", - EnvVars: []string{"UPLOAD_PATH"}, - Value: "/tmp/localai/upload", - }, - &cli.StringFlag{ - Name: "config-path", - Usage: "Path to store uploads from files api", - EnvVars: []string{"CONFIG_PATH"}, - Value: "/tmp/localai/config", - }, - &cli.StringFlag{ - Name: "backend-assets-path", - Usage: "Path used to extract libraries that are required by some of the backends in runtime.", - EnvVars: []string{"BACKEND_ASSETS_PATH"}, - Value: "/tmp/localai/backend_data", - }, - &cli.StringSliceFlag{ - Name: "external-grpc-backends", - Usage: "A list of external grpc backends", - EnvVars: []string{"EXTERNAL_GRPC_BACKENDS"}, - }, - &cli.IntFlag{ - Name: "context-size", - Usage: "Default context size of the model", - EnvVars: []string{"CONTEXT_SIZE"}, - Value: 512, - }, - &cli.IntFlag{ - Name: "upload-limit", - Usage: "Default upload-limit. MB", - EnvVars: []string{"UPLOAD_LIMIT"}, - Value: 15, - }, - &cli.StringSliceFlag{ - Name: "api-keys", - Usage: "List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys.", - EnvVars: []string{"API_KEY"}, - }, - &cli.BoolFlag{ - Name: "enable-watchdog-idle", - Usage: "Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long.", - EnvVars: []string{"WATCHDOG_IDLE"}, - Value: false, - }, - &cli.BoolFlag{ - Name: "disable-welcome", - Usage: "Disable welcome pages", - EnvVars: []string{"DISABLE_WELCOME"}, - Value: false, - }, - &cli.BoolFlag{ - Name: "enable-watchdog-busy", - Usage: "Enable watchdog for stopping busy backends that exceed a defined threshold.", - EnvVars: []string{"WATCHDOG_BUSY"}, - Value: false, - }, - &cli.StringFlag{ - Name: "watchdog-busy-timeout", - Usage: "Watchdog timeout. This will restart the backend if it crashes.", - EnvVars: []string{"WATCHDOG_BUSY_TIMEOUT"}, - Value: "5m", - }, - &cli.StringFlag{ - Name: "watchdog-idle-timeout", - Usage: "Watchdog idle timeout. This will restart the backend if it crashes.", - EnvVars: []string{"WATCHDOG_IDLE_TIMEOUT"}, - Value: "15m", - }, - &cli.BoolFlag{ - Name: "preload-backend-only", - Usage: "If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups.", - EnvVars: []string{"PRELOAD_BACKEND_ONLY"}, - Value: false, - }, - &cli.StringFlag{ - Name: "localai-config-dir", - Usage: "Directory to use for the configuration files of LocalAI itself. This is NOT where model files should be placed.", - EnvVars: []string{"LOCALAI_CONFIG_DIR"}, - Value: "./configuration", - }, - }, - Description: ` -LocalAI is a drop-in replacement OpenAI API which runs inference locally. + // Actually parse the CLI options + ctx := kong.Parse(&cli.CLI, + kong.Description( + ` LocalAI is a drop-in replacement OpenAI API for running LLM, GPT and genAI models locally on CPU, GPUs with consumer grade hardware. Some of the models compatible are: -- Vicuna -- Koala -- GPT4ALL -- GPT4ALL-J -- Cerebras -- Alpaca -- StableLM (ggml quantized) + - Vicuna + - Koala + - GPT4ALL + - GPT4ALL-J + - Cerebras + - Alpaca + - StableLM (ggml quantized) -For a list of compatible model, check out: https://localai.io/model-compatibility/index.html +For a list of compatible models, check out: https://localai.io/model-compatibility/index.html + +Copyright: Ettore Di Giacinto + +Version: ${version} `, - UsageText: `local-ai [options]`, - Copyright: "Ettore Di Giacinto", - Action: func(ctx *cli.Context) error { - opts := []config.AppOption{ - config.WithConfigFile(ctx.String("config-file")), - config.WithJSONStringPreload(ctx.String("preload-models")), - config.WithYAMLConfigPreload(ctx.String("preload-models-config")), - config.WithModelPath(ctx.String("models-path")), - config.WithContextSize(ctx.Int("context-size")), - config.WithDebug(ctx.Bool("debug")), - config.WithImageDir(ctx.String("image-path")), - config.WithAudioDir(ctx.String("audio-path")), - config.WithUploadDir(ctx.String("upload-path")), - config.WithConfigsDir(ctx.String("config-path")), - config.WithF16(ctx.Bool("f16")), - config.WithStringGalleries(ctx.String("galleries")), - config.WithModelLibraryURL(ctx.String("remote-library")), - config.WithDisableMessage(false), - config.WithCors(ctx.Bool("cors")), - config.WithCorsAllowOrigins(ctx.String("cors-allow-origins")), - config.WithThreads(ctx.Int("threads")), - config.WithBackendAssets(backendAssets), - config.WithBackendAssetsOutput(ctx.String("backend-assets-path")), - config.WithUploadLimitMB(ctx.Int("upload-limit")), - config.WithApiKeys(ctx.StringSlice("api-keys")), - config.WithModelsURL(append(ctx.StringSlice("models"), ctx.Args().Slice()...)...), - } - - idleWatchDog := ctx.Bool("enable-watchdog-idle") - busyWatchDog := ctx.Bool("enable-watchdog-busy") - - if ctx.Bool("disable-welcome") { - opts = append(opts, config.DisableWelcomePage) - } - - if idleWatchDog || busyWatchDog { - opts = append(opts, config.EnableWatchDog) - if idleWatchDog { - opts = append(opts, config.EnableWatchDogIdleCheck) - dur, err := time.ParseDuration(ctx.String("watchdog-idle-timeout")) - if err != nil { - return err - } - opts = append(opts, config.SetWatchDogIdleTimeout(dur)) - } - if busyWatchDog { - opts = append(opts, config.EnableWatchDogBusyCheck) - dur, err := time.ParseDuration(ctx.String("watchdog-busy-timeout")) - if err != nil { - return err - } - opts = append(opts, config.SetWatchDogBusyTimeout(dur)) - } - } - if ctx.Bool("parallel-requests") { - opts = append(opts, config.EnableParallelBackendRequests) - } - if ctx.Bool("single-active-backend") { - opts = append(opts, config.EnableSingleBackend) - } - - externalgRPC := ctx.StringSlice("external-grpc-backends") - // split ":" to get backend name and the uri - for _, v := range externalgRPC { - backend := v[:strings.IndexByte(v, ':')] - uri := v[strings.IndexByte(v, ':')+1:] - opts = append(opts, config.WithExternalBackend(backend, uri)) - } - - if ctx.Bool("autoload-galleries") { - opts = append(opts, config.EnableGalleriesAutoload) - } - - if ctx.Bool("preload-backend-only") { - _, _, _, err := startup.Startup(opts...) - return err - } - - cl, ml, options, err := startup.Startup(opts...) - - if err != nil { - return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) - } - - configdir := ctx.String("localai-config-dir") - // Watch the configuration directory - // If the directory does not exist, we don't watch it - if _, err := os.Stat(configdir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options) - defer closeConfigWatcherFn() - - if err != nil { - return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir")) - } - } - - appHTTP, err := http.App(cl, ml, options) - if err != nil { - log.Error().Err(err).Msg("error during HTTP App construction") - return err - } - - return appHTTP.Listen(ctx.String("address")) + ), + kong.UsageOnError(), + kong.Vars{ + "basepath": kong.ExpandPath("."), + "remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml", + "version": internal.PrintableVersion(), }, - Commands: []*cli.Command{ - { - Name: "models", - Usage: "List or install models", - Subcommands: []*cli.Command{ - { - Name: "list", - Usage: "List the models avaiable in your galleries", - Action: func(ctx *cli.Context) error { - var galleries []gallery.Gallery - if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil { - log.Error().Err(err).Msg("unable to load galleries") - } + ) - models, err := gallery.AvailableGalleryModels(galleries, ctx.String("models-path")) - if err != nil { - return err - } - for _, model := range models { - if model.Installed { - fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name) - } else { - fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name) - } - } - return nil - }, - }, - { - Name: "install", - Usage: "Install a model from the gallery", - Action: func(ctx *cli.Context) error { - modelName := ctx.Args().First() - - var galleries []gallery.Gallery - if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil { - log.Error().Err(err).Msg("unable to load galleries") - } - - progressBar := progressbar.NewOptions( - 1000, - progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)), - progressbar.OptionShowBytes(false), - progressbar.OptionClearOnFinish(), - ) - progressCallback := func(fileName string, current string, total string, percentage float64) { - progressBar.Set(int(percentage * 10)) - } - err = gallery.InstallModelFromGallery(galleries, modelName, ctx.String("models-path"), gallery.GalleryModel{}, progressCallback) - if err != nil { - return err - } - return nil - }, - }, - }, - }, - { - Name: "tts", - Usage: "Convert text to speech", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "backend", - Value: "piper", - Aliases: []string{"b"}, - Usage: "Backend to run the TTS model", - }, - &cli.StringFlag{ - Name: "model", - Aliases: []string{"m"}, - Usage: "Model name to run the TTS", - Required: true, - }, - &cli.StringFlag{ - Name: "voice", - Aliases: []string{"v"}, - Usage: "Voice name to run the TTS (optional)", - Required: true, - }, - &cli.StringFlag{ - Name: "output-file", - Aliases: []string{"o"}, - Usage: "The path to write the output wav file", - }, - }, - Action: func(ctx *cli.Context) error { - modelOption := ctx.String("model") - if modelOption == "" { - return errors.New("--model parameter is required") - } - backendOption := ctx.String("backend") - if backendOption == "" { - backendOption = "piper" - } - outputFile := ctx.String("output-file") - outputDir := ctx.String("backend-assets-path") - if outputFile != "" { - outputDir = filepath.Dir(outputFile) - } - - text := strings.Join(ctx.Args().Slice(), " ") - - opts := &config.ApplicationConfig{ - ModelPath: ctx.String("models-path"), - Context: context.Background(), - AudioDir: outputDir, - AssetsDestination: ctx.String("backend-assets-path"), - } - ml := model.NewModelLoader(opts.ModelPath) - - defer ml.StopAllGRPC() - - filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, ctx.String("voice"), ml, opts, config.BackendConfig{}) - if err != nil { - return err - } - if outputFile != "" { - if err := os.Rename(filePath, outputFile); err != nil { - return err - } - fmt.Printf("Generate file %s\n", outputFile) - } else { - fmt.Printf("Generate file %s\n", filePath) - } - return nil - }, - }, - { - Name: "transcript", - Usage: "Convert audio to text", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "backend", - Value: "whisper", - Aliases: []string{"b"}, - Usage: "Backend to run the transcription model", - }, - &cli.StringFlag{ - Name: "model", - Aliases: []string{"m"}, - Usage: "Model name to run the transcription", - }, - &cli.StringFlag{ - Name: "language", - Aliases: []string{"l"}, - Usage: "Language of the audio file", - }, - &cli.IntFlag{ - Name: "threads", - Aliases: []string{"t"}, - Usage: "Threads to use", - Value: 1, - }, - &cli.StringFlag{ - Name: "output-file", - Aliases: []string{"o"}, - Usage: "The path to write the output wav file", - }, - }, - Action: func(ctx *cli.Context) error { - modelOption := ctx.String("model") - filename := ctx.Args().First() - language := ctx.String("language") - threads := ctx.Int("threads") - - opts := &config.ApplicationConfig{ - ModelPath: ctx.String("models-path"), - Context: context.Background(), - AssetsDestination: ctx.String("backend-assets-path"), - } - - cl := config.NewBackendConfigLoader() - ml := model.NewModelLoader(opts.ModelPath) - if err := cl.LoadBackendConfigsFromPath(ctx.String("models-path")); err != nil { - return err - } - - c, exists := cl.GetBackendConfig(modelOption) - if !exists { - return errors.New("model not found") - } - - c.Threads = &threads - - defer ml.StopAllGRPC() - - tr, err := backend.ModelTranscription(filename, language, ml, c, opts) - if err != nil { - return err - } - for _, segment := range tr.Segments { - fmt.Println(segment.Start.String(), "-", segment.Text) - } - return nil - }, - }, - }, + // Configure the logging level before we run the application + // This is here to preserve the existing --debug flag functionality + logLevel := "info" + if cli.CLI.Debug && cli.CLI.LogLevel == nil { + logLevel = "debug" + zerolog.SetGlobalLevel(zerolog.DebugLevel) + cli.CLI.LogLevel = &logLevel } - err = app.Run(os.Args) - if err != nil { - log.Error().Err(err).Msg("application runtime error") - os.Exit(1) + if cli.CLI.LogLevel == nil { + cli.CLI.LogLevel = &logLevel } + + switch *cli.CLI.LogLevel { + case "error": + log.Info().Msg("Setting logging to error") + zerolog.SetGlobalLevel(zerolog.ErrorLevel) + case "warn": + log.Info().Msg("Setting logging to warn") + zerolog.SetGlobalLevel(zerolog.WarnLevel) + case "info": + log.Info().Msg("Setting logging to info") + zerolog.SetGlobalLevel(zerolog.InfoLevel) + case "debug": + log.Info().Msg("Setting logging to debug") + zerolog.SetGlobalLevel(zerolog.DebugLevel) + } + + // Populate the application with the embedded backend assets + cli.CLI.Context.BackendAssets = backendAssets + + // Run the thing! + err = ctx.Run(&cli.CLI.Context) + + ctx.FatalIfErrorf(err) } From 0e44a4e66442327fa4bf340322f2f85ca7308500 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:19:46 +0200 Subject: [PATCH 0259/2895] :arrow_up: Update docs version mudler/LocalAI (#1988) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index cc0478ca..1b6a2161 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.1" + "version": "v2.12.3" } From e152b07b74cda26f2513fb85755c6b860e7ca65f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:22:07 +0200 Subject: [PATCH 0260/2895] :arrow_up: Update ggerganov/llama.cpp (#1991) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 337ebc64..e2e4f211 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=1b67731e184e27a465b8c5476061294a4af668ea +CPPLLAMA_VERSION?=8228b66dbc16290c5cbd70e80ab47c068e2569d8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b4548ad72dc31a00a2a819c3bf540012bf11432a Mon Sep 17 00:00:00 2001 From: Ludovic Leroux Date: Thu, 11 Apr 2024 03:44:39 -0400 Subject: [PATCH 0261/2895] feat: add flash-attn in nvidia and rocm envs (#1995) Signed-off-by: Ludovic LEROUX --- .../python/common-env/transformers/install.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 8502adde..30ec0de0 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -2,6 +2,7 @@ set -ex SKIP_CONDA=${SKIP_CONDA:-0} +REQUIREMENTS_FILE=$1 # Check if environment exist conda_env_exists(){ @@ -14,7 +15,7 @@ else export PATH=$PATH:/opt/conda/bin if conda_env_exists "transformers" ; then echo "Creating virtual environment..." - conda env create --name transformers --file $1 + conda env create --name transformers --file $REQUIREMENTS_FILE echo "Virtual environment created." else echo "Virtual environment already exists." @@ -28,11 +29,16 @@ if [ -d "/opt/intel" ]; then pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] fi -if [ "$PIP_CACHE_PURGE" = true ] ; then - if [ $SKIP_CONDA -eq 0 ]; then - # Activate conda environment - source activate transformers - fi +# If we didn't skip conda, activate the environment +# to install FlashAttention +if [ $SKIP_CONDA -eq 0 ]; then + source activate transformers +fi +if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then + #TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily + pip install flash-attn --no-build-isolation +fi +if [ "$PIP_CACHE_PURGE" = true ] ; then pip cache purge fi \ No newline at end of file From c74dec7e387160fa4ab1fcebed94d8d9197fa1c4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 11:47:54 +0200 Subject: [PATCH 0262/2895] Add dependabot.yml Signed-off-by: Ettore Di Giacinto --- .github/dependabot.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..52abf1db --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,25 @@ +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + - package-ecosystem: "github-actions" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + # Check for updates to GitHub Actions every weekday + interval: "weekly" + - package-ecosystem: "pip" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + # Check for updates to GitHub Actions every weekday + interval: "weekly" + - package-ecosystem: "docker" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + # Check for updates to GitHub Actions every weekday + interval: "weekly" From 182fef339d801744c39420e10d27e98ee9f965e5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 12:13:06 +0200 Subject: [PATCH 0263/2895] Create dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/dependabot_auto.yml diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml new file mode 100644 index 00000000..12541d05 --- /dev/null +++ b/.github/workflows/dependabot_auto.yml @@ -0,0 +1,44 @@ +name: Dependabot auto-merge +on: + pull_request_target: + types: [review_requested] + +permissions: + contents: write + pull-requests: write + packages: read + +jobs: + dependabot: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@v1.3.4 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + skip-commit-verification: true + + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Approve a PR if not already approved + run: | + gh pr checkout "$PR_URL" + if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ]; + then + gh pr review --approve "$PR_URL" + else + echo "PR already approved."; + fi + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + + - name: Enable auto-merge for Dependabot PRs + if: ${{ contains(github.event.pull_request.title, 'bump')}} + run: gh pr merge --auto --merge "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.RELEASE_TOKEN}} From a49248d29f637c424a29aea28e4ef947cda99b9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 11:07:45 +0000 Subject: [PATCH 0264/2895] build(deps): bump google.golang.org/protobuf from 1.31.0 to 1.33.0 (#1998) Bumps google.golang.org/protobuf from 1.31.0 to 1.33.0. --- updated-dependencies: - dependency-name: google.golang.org/protobuf dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index fac7acfd..d065436c 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( go.opentelemetry.io/otel/metric v1.19.0 go.opentelemetry.io/otel/sdk/metric v1.19.0 google.golang.org/grpc v1.59.0 - google.golang.org/protobuf v1.31.0 + google.golang.org/protobuf v1.33.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index dc08c465..8b3a8cc4 100644 --- a/go.sum +++ b/go.sum @@ -3,6 +3,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= +github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= +github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= @@ -64,6 +66,8 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4= +github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= @@ -72,6 +76,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= @@ -93,6 +99,10 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y= +github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc= +github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY= +github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= @@ -217,12 +227,18 @@ github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdx github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk= github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= +github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= +github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0= +github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks= +github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0= github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc= +github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA= +github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= @@ -483,8 +499,8 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= From 079597548650a9665baa82b89e2eeafb66debcd6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 11:44:34 +0000 Subject: [PATCH 0265/2895] build(deps): bump github.com/docker/docker from 20.10.7+incompatible to 24.0.9+incompatible (#1999) build(deps): bump github.com/docker/docker Bumps [github.com/docker/docker](https://github.com/docker/docker) from 20.10.7+incompatible to 24.0.9+incompatible. - [Release notes](https://github.com/docker/docker/releases) - [Commits](https://github.com/docker/docker/compare/v20.10.7...v24.0.9) --- updated-dependencies: - dependency-name: github.com/docker/docker dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index d065436c..e60c7672 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect github.com/docker/cli v20.10.17+incompatible // indirect - github.com/docker/docker v20.10.7+incompatible // indirect + github.com/docker/docker v24.0.9+incompatible // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.4.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect diff --git a/go.sum b/go.sum index 8b3a8cc4..2141db5f 100644 --- a/go.sum +++ b/go.sum @@ -60,8 +60,8 @@ github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0 github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M= github.com/docker/cli v20.10.17+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/docker v20.10.7+incompatible h1:Z6O9Nhsjv+ayUEeI1IojKbYcsGdgYSNqxe1s2MYzUhQ= -github.com/docker/docker v20.10.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v24.0.9+incompatible h1:HPGzNmwfLZWdxHqK9/II92pyi1EpYKsAqcl4G0Of9v0= +github.com/docker/docker v24.0.9+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= From fdfd868953a9e40d4e1b9a2eb6d428645572311d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 12:21:52 +0000 Subject: [PATCH 0266/2895] build(deps): bump github.com/gofiber/fiber/v2 from 2.52.0 to 2.52.1 (#2001) Bumps [github.com/gofiber/fiber/v2](https://github.com/gofiber/fiber) from 2.52.0 to 2.52.1. - [Release notes](https://github.com/gofiber/fiber/releases) - [Commits](https://github.com/gofiber/fiber/compare/v2.52.0...v2.52.1) --- updated-dependencies: - dependency-name: github.com/gofiber/fiber/v2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e60c7672..238e150f 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 - github.com/gofiber/fiber/v2 v2.52.0 + github.com/gofiber/fiber/v2 v2.52.1 github.com/gofiber/swagger v1.0.0 github.com/gofiber/template/html/v2 v2.1.1 github.com/google/uuid v1.5.0 diff --git a/go.sum b/go.sum index 2141db5f..c66e9b7c 100644 --- a/go.sum +++ b/go.sum @@ -110,8 +110,8 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE= -github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= +github.com/gofiber/fiber/v2 v2.52.1 h1:1RoU2NS+b98o1L77sdl5mboGPiW+0Ypsi5oLmcYlgHI= +github.com/gofiber/fiber/v2 v2.52.1/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg= github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc= From 40781ac013e4fd2574f1faef0bf5a0d491317a34 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 12:48:30 +0000 Subject: [PATCH 0267/2895] build(deps): bump actions/checkout from 3 to 4 (#2002) Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/secscan.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index a5221b40..14958070 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -14,7 +14,7 @@ jobs: GO111MODULE: on steps: - name: Checkout Source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Run Gosec Security Scanner uses: securego/gosec@master with: From 11a0418510aa2fba956251ec09dee442e906fcb2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 13:10:32 +0000 Subject: [PATCH 0268/2895] build(deps): bump actions/setup-go from 4 to 5 (#2003) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 4 to 5. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 6 +++--- .github/workflows/test.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 1d749189..269a10c5 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -33,7 +33,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21.x' cache: false @@ -100,7 +100,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21.x' cache: false @@ -138,7 +138,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21.x' cache: false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 95d10862..28a221bb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -60,7 +60,7 @@ jobs: with: submodules: true - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} cache: false @@ -177,7 +177,7 @@ jobs: with: submodules: true - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} cache: false From 821cf0e3fd80a14688a4ebb432d0b9e6cb8a3d31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 13:58:04 +0000 Subject: [PATCH 0269/2895] build(deps): bump peter-evans/create-pull-request from 5 to 6 (#2005) Bumps [peter-evans/create-pull-request](https://github.com/peter-evans/create-pull-request) from 5 to 6. - [Release notes](https://github.com/peter-evans/create-pull-request/releases) - [Commits](https://github.com/peter-evans/create-pull-request/compare/v5...v6) --- updated-dependencies: - dependency-name: peter-evans/create-pull-request dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/bump_deps.yaml | 2 +- .github/workflows/bump_docs.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 756398d1..2abb2cab 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -49,7 +49,7 @@ jobs: run: | bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} - name: Create Pull Request - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.UPDATE_BOT_TOKEN }} push-to-fork: ci-forks/LocalAI diff --git a/.github/workflows/bump_docs.yaml b/.github/workflows/bump_docs.yaml index 7d52359f..c3ab1698 100644 --- a/.github/workflows/bump_docs.yaml +++ b/.github/workflows/bump_docs.yaml @@ -17,7 +17,7 @@ jobs: run: | bash .github/bump_docs.sh ${{ matrix.repository }} - name: Create Pull Request - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.UPDATE_BOT_TOKEN }} push-to-fork: ci-forks/LocalAI From 0a6956b029593dd89ba8dfd8241e01c26d19b887 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:35:27 +0000 Subject: [PATCH 0270/2895] build(deps): bump actions/cache from 3 to 4 (#2006) Bumps [actions/cache](https://github.com/actions/cache) from 3 to 4. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 269a10c5..8341a188 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -55,7 +55,7 @@ jobs: sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} - name: Cache grpc id: cache-grpc - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: grpc key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 28a221bb..02093b3f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -92,7 +92,7 @@ jobs: GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build - name: Cache grpc id: cache-grpc - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: grpc key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }} From b606c7b7680d5592e29228daa133d88b16fbae19 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:44:02 +0000 Subject: [PATCH 0271/2895] build(deps): bump actions/upload-artifact from 3 to 4 (#2007) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3 to 4. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 8341a188..8198fb3d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -82,7 +82,7 @@ jobs: else STATIC=true make dist fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: ${{ matrix.build }} path: release/ @@ -111,7 +111,7 @@ jobs: run: | make backend-assets/grpc/stablediffusion mkdir -p release && cp backend-assets/grpc/stablediffusion release - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: stablediffusion path: release/ @@ -154,7 +154,7 @@ jobs: export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include make dist - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: ${{ matrix.build }} path: release/ From fce606fc0f2a116b25dc51fa51118a6642ba34b3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:41:58 +0000 Subject: [PATCH 0272/2895] build(deps): bump github.com/charmbracelet/glamour from 0.6.0 to 0.7.0 (#2004) Bumps [github.com/charmbracelet/glamour](https://github.com/charmbracelet/glamour) from 0.6.0 to 0.7.0. - [Release notes](https://github.com/charmbracelet/glamour/releases) - [Commits](https://github.com/charmbracelet/glamour/compare/v0.6.0...v0.7.0) --- updated-dependencies: - dependency-name: github.com/charmbracelet/glamour dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 12 ++++++------ go.sum | 30 +++++++++++++----------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/go.mod b/go.mod index 238e150f..081c25d6 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.21 require ( github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf github.com/Masterminds/sprig/v3 v3.2.3 - github.com/charmbracelet/glamour v0.6.0 + github.com/charmbracelet/glamour v0.7.0 github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df github.com/fsnotify/fsnotify v1.7.0 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e @@ -65,8 +65,8 @@ require ( github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect - github.com/alecthomas/chroma v0.10.0 // indirect - github.com/aymanbagabas/go-osc52 v1.0.3 // indirect + github.com/alecthomas/chroma/v2 v2.8.0 // indirect + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.1.3 // indirect @@ -104,7 +104,7 @@ require ( github.com/mitchellh/reflectwalk v1.0.0 // indirect github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect github.com/muesli/reflow v0.3.0 // indirect - github.com/muesli/termenv v0.13.0 // indirect + github.com/muesli/termenv v0.15.2 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect @@ -126,8 +126,8 @@ require ( github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect github.com/xeipuuv/gojsonschema v1.2.0 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect - github.com/yuin/goldmark v1.5.2 // indirect - github.com/yuin/goldmark-emoji v1.0.1 // indirect + github.com/yuin/goldmark v1.5.4 // indirect + github.com/yuin/goldmark-emoji v1.0.2 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect golang.org/x/crypto v0.21.0 // indirect diff --git a/go.sum b/go.sum index c66e9b7c..359bc836 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEV github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= -github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= -github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= +github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264= +github.com/alecthomas/chroma/v2 v2.8.0/go.mod h1:yrkMI9807G1ROx13fhe1v6PN2DDeaR73L3d+1nmYQtw= github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA= github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os= github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= @@ -26,8 +26,8 @@ github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW5 github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/aymanbagabas/go-osc52 v1.0.3 h1:DTwqENW7X9arYimJrPeGZcV0ln14sGMt3pHZspWD+Mg= -github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4= +github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= +github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -36,8 +36,8 @@ github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8 github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc= -github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc= +github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= +github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps= github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= @@ -55,7 +55,6 @@ github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0= github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M= @@ -212,7 +211,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zk github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo= github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4= -github.com/microcosm-cc/bluemonday v1.0.21/go.mod h1:ytNkv4RrDrLJ2pqlsSI46O6IVXmZOBBD4SaJyDwwTkM= github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58= github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= @@ -235,8 +233,8 @@ github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6 github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= -github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0= -github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc= +github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo= +github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= @@ -330,7 +328,6 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= @@ -371,11 +368,12 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.7/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU= -github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os= -github.com/yuin/goldmark-emoji v1.0.1/go.mod h1:2w1E6FEWLcDQkoTE+7HU6QF1F6SLlNGjRIBbIZQFqkQ= +github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU= +github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark-emoji v1.0.2 h1:c/RgTShNgHTtc6xdz2KKI74jJr6rWi7FPgnP9GAsO5s= +github.com/yuin/goldmark-emoji v1.0.2/go.mod h1:RhP/RWpexdp+KHs7ghKnifRoIs/Bq4nDS7tRbCkOwKY= github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= @@ -413,7 +411,6 @@ golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= @@ -450,7 +447,6 @@ golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= From b1a242251ca252127f830ae20de9d55d40c21e4c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 18:26:03 +0200 Subject: [PATCH 0273/2895] ci: fixup upload artifact name Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 8198fb3d..a69a2b05 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -84,7 +84,7 @@ jobs: fi - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.build }} + name: LocalAI-linux-${{ matrix.build }} path: release/ - name: Release uses: softprops/action-gh-release@v1 @@ -115,12 +115,6 @@ jobs: with: name: stablediffusion path: release/ - - name: Release - uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* build-macOS: strategy: @@ -156,7 +150,7 @@ jobs: make dist - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.build }} + name: LocalAI-MacOS-${{ matrix.build }} path: release/ - name: Release uses: softprops/action-gh-release@v1 From cbda06fb96661e7c9386ccca1c6dcaf652083a70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:52:54 +0000 Subject: [PATCH 0274/2895] build(deps): bump github.com/gofiber/fiber/v2 from 2.52.0 to 2.52.4 (#2008) Bumps [github.com/gofiber/fiber/v2](https://github.com/gofiber/fiber) from 2.52.0 to 2.52.4. - [Release notes](https://github.com/gofiber/fiber/releases) - [Commits](https://github.com/gofiber/fiber/compare/v2.52.0...v2.52.4) --- updated-dependencies: - dependency-name: github.com/gofiber/fiber/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 081c25d6..298f2d69 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 - github.com/gofiber/fiber/v2 v2.52.1 + github.com/gofiber/fiber/v2 v2.52.4 github.com/gofiber/swagger v1.0.0 github.com/gofiber/template/html/v2 v2.1.1 github.com/google/uuid v1.5.0 diff --git a/go.sum b/go.sum index 359bc836..551dd922 100644 --- a/go.sum +++ b/go.sum @@ -109,8 +109,8 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.52.1 h1:1RoU2NS+b98o1L77sdl5mboGPiW+0Ypsi5oLmcYlgHI= -github.com/gofiber/fiber/v2 v2.52.1/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= +github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM= +github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg= github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc= From 12c0d9443ecfa7367e041b900a243e0c77726dce Mon Sep 17 00:00:00 2001 From: Ludovic Leroux Date: Thu, 11 Apr 2024 13:20:22 -0400 Subject: [PATCH 0275/2895] feat: use tokenizer.apply_chat_template() in vLLM (#1990) Use tokenizer.apply_chat_template() in vLLM Signed-off-by: Ludovic LEROUX --- backend/backend.proto | 7 + backend/python/autogptq/backend_pb2.py | 98 ++- backend/python/autogptq/backend_pb2_grpc.py | 132 +++ backend/python/bark/backend_pb2.py | 98 ++- backend/python/bark/backend_pb2_grpc.py | 132 +++ backend/python/coqui/backend_pb2.py | 98 ++- backend/python/coqui/backend_pb2_grpc.py | 132 +++ backend/python/diffusers/backend_pb2.py | 98 ++- backend/python/diffusers/backend_pb2_grpc.py | 132 +++ backend/python/exllama/backend_pb2.py | 98 ++- backend/python/exllama/backend_pb2_grpc.py | 132 +++ backend/python/exllama2/backend_pb2.py | 98 ++- backend/python/exllama2/backend_pb2_grpc.py | 132 +++ backend/python/mamba/backend_pb2.py | 98 ++- backend/python/mamba/backend_pb2_grpc.py | 132 +++ backend/python/petals/backend_pb2.py | 98 ++- backend/python/petals/backend_pb2_grpc.py | 132 +++ .../sentencetransformers/backend_pb2.py | 98 ++- .../sentencetransformers/backend_pb2_grpc.py | 132 +++ .../transformers-musicgen/backend_pb2.py | 98 ++- .../transformers-musicgen/backend_pb2_grpc.py | 132 +++ backend/python/transformers/backend_pb2.py | 98 ++- .../python/transformers/backend_pb2_grpc.py | 132 +++ backend/python/vall-e-x/backend_pb2.py | 98 ++- backend/python/vall-e-x/backend_pb2_grpc.py | 132 +++ backend/python/vllm/backend_pb2.py | 98 ++- backend/python/vllm/backend_pb2_grpc.py | 132 +++ backend/python/vllm/backend_vllm.py | 23 +- core/backend/llm.go | 25 +- core/config/backend_config.go | 11 +- core/http/endpoints/openai/chat.go | 256 +++--- core/http/endpoints/openai/inference.go | 2 +- pkg/grpc/proto/backend.pb.go | 761 ++++++++++-------- pkg/grpc/proto/backend_grpc.pb.go | 2 +- 34 files changed, 3088 insertions(+), 989 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index c3d3180b..56d919ef 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -107,6 +107,8 @@ message PredictOptions { string NegativePrompt = 40; int32 NDraft = 41; repeated string Images = 42; + bool UseTokenizerTemplate = 43; + repeated Message Messages = 44; } // The response message containing the result @@ -256,3 +258,8 @@ message StatusResponse { State state = 1; MemoryUsageData memory = 2; } + +message Message { + string role = 1; + string content = 2; +} \ No newline at end of file diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/autogptq/backend_pb2.py +++ b/backend/python/autogptq/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/autogptq/backend_pb2_grpc.py b/backend/python/autogptq/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/autogptq/backend_pb2_grpc.py +++ b/backend/python/autogptq/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/bark/backend_pb2.py +++ b/backend/python/bark/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/bark/backend_pb2_grpc.py b/backend/python/bark/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/bark/backend_pb2_grpc.py +++ b/backend/python/bark/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/coqui/backend_pb2.py +++ b/backend/python/coqui/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/coqui/backend_pb2_grpc.py b/backend/python/coqui/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/coqui/backend_pb2_grpc.py +++ b/backend/python/coqui/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/diffusers/backend_pb2.py +++ b/backend/python/diffusers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/diffusers/backend_pb2_grpc.py b/backend/python/diffusers/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/diffusers/backend_pb2_grpc.py +++ b/backend/python/diffusers/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/exllama/backend_pb2.py +++ b/backend/python/exllama/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/backend_pb2_grpc.py b/backend/python/exllama/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/exllama/backend_pb2_grpc.py +++ b/backend/python/exllama/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/exllama2/backend_pb2.py +++ b/backend/python/exllama2/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama2/backend_pb2_grpc.py b/backend/python/exllama2/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/exllama2/backend_pb2_grpc.py +++ b/backend/python/exllama2/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/mamba/backend_pb2.py +++ b/backend/python/mamba/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/mamba/backend_pb2_grpc.py b/backend/python/mamba/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/mamba/backend_pb2_grpc.py +++ b/backend/python/mamba/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/petals/backend_pb2.py +++ b/backend/python/petals/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/petals/backend_pb2_grpc.py b/backend/python/petals/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/petals/backend_pb2_grpc.py +++ b/backend/python/petals/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/sentencetransformers/backend_pb2.py +++ b/backend/python/sentencetransformers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/sentencetransformers/backend_pb2_grpc.py b/backend/python/sentencetransformers/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/sentencetransformers/backend_pb2_grpc.py +++ b/backend/python/sentencetransformers/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/transformers-musicgen/backend_pb2.py +++ b/backend/python/transformers-musicgen/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers-musicgen/backend_pb2_grpc.py b/backend/python/transformers-musicgen/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/transformers-musicgen/backend_pb2_grpc.py +++ b/backend/python/transformers-musicgen/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/transformers/backend_pb2.py +++ b/backend/python/transformers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/backend_pb2_grpc.py b/backend/python/transformers/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/transformers/backend_pb2_grpc.py +++ b/backend/python/transformers/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/vall-e-x/backend_pb2.py +++ b/backend/python/vall-e-x/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vall-e-x/backend_pb2_grpc.py b/backend/python/vall-e-x/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/vall-e-x/backend_pb2_grpc.py +++ b/backend/python/vall-e-x/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/vllm/backend_pb2.py +++ b/backend/python/vllm/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vllm/backend_pb2_grpc.py b/backend/python/vllm/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/vllm/backend_pb2_grpc.py +++ b/backend/python/vllm/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py index ef5134b8..ff0f0b26 100644 --- a/backend/python/vllm/backend_vllm.py +++ b/backend/python/vllm/backend_vllm.py @@ -14,6 +14,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid +from vllm.transformers_utils.tokenizer import get_tokenizer _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -71,7 +72,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): """ return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - def LoadModel(self, request, context): + async def LoadModel(self, request, context): """ Loads a language model. @@ -103,6 +104,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.llm = AsyncLLMEngine.from_engine_args(engine_args) except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + try: + engine_model_config = await self.llm.get_model_config() + self.tokenizer = get_tokenizer( + engine_model_config.tokenizer, + tokenizer_mode=engine_model_config.tokenizer_mode, + trust_remote_code=engine_model_config.trust_remote_code, + truncation_side="left", + ) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(message="Model loaded successfully", success=True) async def Predict(self, request, context): @@ -161,9 +174,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Seed != 0: sampling_params.seed = request.Seed + prompt = request.Prompt + + # If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template + if not request.Prompt and request.UseTokenizerTemplate and request.Messages: + prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) + # Generate text request_id = random_uuid() - outputs = self.llm.generate(request.Prompt, sampling_params, request_id) + outputs = self.llm.generate(prompt, sampling_params, request_id) # Stream the results generated_text = "" diff --git a/core/backend/llm.go b/core/backend/llm.go index d5e14df0..493dc25c 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -2,6 +2,7 @@ package backend import ( "context" + "fmt" "os" "regexp" "strings" @@ -9,9 +10,11 @@ import ( "unicode/utf8" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) @@ -26,7 +29,7 @@ type TokenUsage struct { Completion int } -func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { +func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { modelFile := c.Model threads := c.Threads if *threads == 0 && o.Threads != 0 { @@ -71,10 +74,30 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode return nil, err } + var protoMessages []*proto.Message + // if we are using the tokenizer template, we need to convert the messages to proto messages + // unless the prompt has already been tokenized (non-chat endpoints + functions) + if c.TemplateConfig.UseTokenizerTemplate && s == "" { + protoMessages = make([]*proto.Message, len(messages), len(messages)) + for i, message := range messages { + protoMessages[i] = &proto.Message{ + Role: message.Role, + } + switch ct := message.Content.(type) { + case string: + protoMessages[i].Content = ct + default: + return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct) + } + } + } + // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported fn := func() (LLMResponse, error) { opts := gRPCPredictOpts(c, loader.ModelPath) opts.Prompt = s + opts.Messages = protoMessages + opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate opts.Images = images tokenUsage := TokenUsage{} diff --git a/core/config/backend_config.go b/core/config/backend_config.go index a90b1c1b..81c92d01 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -165,11 +165,12 @@ type Functions struct { } type TemplateConfig struct { - Chat string `yaml:"chat"` - ChatMessage string `yaml:"chat_message"` - Completion string `yaml:"completion"` - Edit string `yaml:"edit"` - Functions string `yaml:"function"` + Chat string `yaml:"chat"` + ChatMessage string `yaml:"chat_message"` + Completion string `yaml:"completion"` + Edit string `yaml:"edit"` + Functions string `yaml:"function"` + UseTokenizerTemplate bool `yaml:"use_tokenizer_template"` } func (c *BackendConfig) SetFunctionCallString(s string) { diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 871ae6c1..36d1142b 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -230,112 +230,154 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup var predInput string - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range input.Messages { - var content string - role := i.Role + // If we are using the tokenizer template, we don't need to process the messages + // unless we are processing functions + if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := config.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := config.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" + suppressConfigSystemPrompt := false + mess := []string{} + for messageIndex, i := range input.Messages { + var content string + role := i.Role - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if config.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: config.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(input.Messages) - 1), - Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + // if function call, we might want to customize the role so we can display better that the "assistant called a json action" + // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { + roleFn := "assistant_function_call" + r := config.Roles[roleFn] + if r != "" { + role = roleFn } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage } - } + r := config.Roles[role] + contentExists := i.Content != nil && i.StringContent != "" - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + + // First attempt to populate content via a chat message specific template + if config.TemplateConfig.ChatMessage != "" { + chatMessageData := model.ChatMessageTemplateData{ + SystemPrompt: config.SystemPrompt, + Role: r, + RoleName: role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(input.Messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), + MessageIndex: messageIndex, + } + templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) + if err != nil { + log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") } else { - content = fmt.Sprint(r, " ", string(j)) + if templatedChatMessage == "" { + log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) + continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + } + log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) + content = templatedChatMessage } } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) + + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } + // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. + if content == "" { + if r != "" { + if contentExists { + content = fmt.Sprint(r, i.StringContent) + } + + if i.FunctionCall != nil { + marshalAnyRole(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAnyRole(i.ToolCalls) + } } else { - content = string(j) + if contentExists { + content = fmt.Sprint(i.StringContent) + } + if i.FunctionCall != nil { + marshalAny(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) + } + } + // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately + if contentExists && role == "system" { + suppressConfigSystemPrompt = true } } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAnyRole(i.ToolCalls) - } + mess = append(mess, content) + } + + predInput = strings.Join(mess, "\n") + log.Debug().Msgf("Prompt (before templating): %s", predInput) + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model + } + + if config.TemplateConfig.Chat != "" && !processFunctions { + templateFile = config.TemplateConfig.Chat + } + + if config.TemplateConfig.Functions != "" && processFunctions { + templateFile = config.TemplateConfig.Functions + } + + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + SuppressSystemPrompt: suppressConfigSystemPrompt, + Input: predInput, + Functions: funcs, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true + log.Debug().Msgf("Template failed loading: %s", err.Error()) } } - mess = append(mess, content) + log.Debug().Msgf("Prompt (after templating): %s", predInput) + if processFunctions { + log.Debug().Msgf("Grammar: %+v", config.Grammar) + } } - predInput = strings.Join(mess, "\n") - log.Debug().Msgf("Prompt (before templating): %s", predInput) + switch { + case toStream: - if toStream { log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) @@ -343,45 +385,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") - } - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Chat != "" && !processFunctions { - templateFile = config.TemplateConfig.Chat - } - - if config.TemplateConfig.Functions != "" && processFunctions { - templateFile = config.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } - - log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { - log.Debug().Msgf("Grammar: %+v", config.Grammar) - } - - switch { - case toStream: responses := make(chan schema.OpenAIResponse) if !processFunctions { @@ -563,7 +567,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m images = append(images, m.StringImages...) } - predFunc, err := backend.ModelInference(input.Context, prompt, images, ml, *config, o, nil) + predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil) if err != nil { log.Error().Err(err).Msg("model inference failed") return "", err diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go index 5d97d21d..06e784b7 100644 --- a/core/http/endpoints/openai/inference.go +++ b/core/http/endpoints/openai/inference.go @@ -29,7 +29,7 @@ func ComputeChoices( } // get the model function to call for the result - predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback) + predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback) if err != nil { return result, backend.TokenUsage{}, err } diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go index cc687577..e9afe196 100644 --- a/pkg/grpc/proto/backend.pb.go +++ b/pkg/grpc/proto/backend.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 -// protoc v4.23.4 +// protoc-gen-go v1.26.0 +// protoc v5.26.1 // source: backend.proto package proto @@ -532,47 +532,49 @@ type PredictOptions struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` - Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` - Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` - Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` - TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` - Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` - Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` - NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` - Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` - Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` - F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` - DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` - StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` - IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` - TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` - TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` - FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` - PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` - Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` - MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` - MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` - PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` - LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` - MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` - MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` - PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` - PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` - Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` - MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` - TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` - TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` - PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` - Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` - EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"` - Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` - RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` - RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` - NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"` - NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"` - NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"` - Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"` + Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` + Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` + Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` + Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` + TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` + Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` + Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` + NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` + Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` + Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` + F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` + DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` + StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` + IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` + TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` + TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` + FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` + PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` + Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` + MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` + MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` + PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` + LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` + MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` + MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` + PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` + PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` + Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` + MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` + TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` + TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` + PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` + Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` + EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"` + Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` + RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` + RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` + NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"` + NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"` + NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"` + Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"` + UseTokenizerTemplate bool `protobuf:"varint,43,opt,name=UseTokenizerTemplate,proto3" json:"UseTokenizerTemplate,omitempty"` + Messages []*Message `protobuf:"bytes,44,rep,name=Messages,proto3" json:"Messages,omitempty"` } func (x *PredictOptions) Reset() { @@ -894,6 +896,20 @@ func (x *PredictOptions) GetImages() []string { return nil } +func (x *PredictOptions) GetUseTokenizerTemplate() bool { + if x != nil { + return x.UseTokenizerTemplate + } + return false +} + +func (x *PredictOptions) GetMessages() []*Message { + if x != nil { + return x.Messages + } + return nil +} + // The response message containing the result type Reply struct { state protoimpl.MessageState @@ -2080,6 +2096,61 @@ func (x *StatusResponse) GetMemory() *MemoryUsageData { return nil } +type Message struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"` + Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"` +} + +func (x *Message) Reset() { + *x = Message{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[22] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Message) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Message) ProtoMessage() {} + +func (x *Message) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[22] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Message.ProtoReflect.Descriptor instead. +func (*Message) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{22} +} + +func (x *Message) GetRole() string { + if x != nil { + return x.Role + } + return "" +} + +func (x *Message) GetContent() string { + if x != nil { + return x.Content + } + return "" +} + var File_backend_proto protoreflect.FileDescriptor var file_backend_proto_rawDesc = []byte{ @@ -2125,7 +2196,7 @@ var file_backend_proto_rawDesc = []byte{ 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, - 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0xd6, 0x0a, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, @@ -2204,257 +2275,267 @@ var file_backend_proto_rawDesc = []byte{ 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, - 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, - 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, - 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, - 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c, - 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, - 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, - 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, - 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, - 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18, - 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e, - 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12, - 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55, - 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, - 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, - 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, - 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18, - 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72, - 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, - 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, - 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, - 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, - 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28, - 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, - 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, - 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, - 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, - 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, - 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, - 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, - 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, - 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, - 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, - 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, - 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69, - 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, - 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18, - 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, - 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53, - 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53, - 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18, - 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c, - 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, - 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1e, - 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x12, 0x1c, - 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, - 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61, - 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c, - 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, - 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x09, 0x4c, - 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75, - 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d, - 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, - 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66, - 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, - 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, - 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e, - 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d, - 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, - 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f, - 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18, - 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, - 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e, - 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77, - 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53, - 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d, - 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d, - 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, - 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, - 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, - 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, - 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, - 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72, - 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61, - 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01, - 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, - 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, - 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, - 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, - 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, - 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, - 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, - 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, - 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, - 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, - 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, - 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, - 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, - 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, - 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, - 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, - 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, - 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, - 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, - 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, - 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, - 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, - 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, - 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, - 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, - 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, - 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, - 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, - 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, - 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, - 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, - 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, - 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, - 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, - 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, - 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, - 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, - 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, - 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, - 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, - 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, - 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, - 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, - 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac, - 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, - 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61, - 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, - 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, - 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a, - 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01, - 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, - 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45, - 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a, - 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, - 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xfb, 0x06, 0x0a, - 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, - 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, - 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, - 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, + 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x2b, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, + 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x2c, 0x0a, 0x08, 0x4d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x08, + 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, + 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, + 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, + 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, + 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, + 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, + 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, + 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, + 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, + 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, + 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, + 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, + 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, + 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, + 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, + 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, + 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, + 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, + 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, + 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, + 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, + 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, + 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, + 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, + 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, + 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, + 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, + 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, + 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, + 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, + 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, + 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, + 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, + 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, + 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, + 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, + 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, + 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, + 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, + 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, + 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, + 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, + 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, + 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, + 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, + 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, + 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, + 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, + 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, + 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, + 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, + 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, + 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, + 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, + 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, + 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, + 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, + 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, + 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, + 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, + 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, + 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, + 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, + 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, + 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, + 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, + 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, + 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, + 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, + 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, + 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, + 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, + 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, + 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, + 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, + 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, + 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, + 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, + 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, + 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, + 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, + 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, + 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, + 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, + 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, + 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, + 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, + 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, + 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, + 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, + 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, + 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, + 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, + 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, + 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, + 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, + 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, + 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, + 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, + 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, + 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, + 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, + 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, + 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, + 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, + 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, + 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, + 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, + 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, + 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, + 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, + 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, + 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, + 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, + 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, + 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, + 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, + 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, + 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, + 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, + 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, + 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, + 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, + 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, + 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, + 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, + 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, + 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, + 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, + 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x22, 0x37, + 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a, + 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x32, 0xfb, 0x06, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, + 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, + 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, + 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, + 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, + 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, + 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, + 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, + 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, - 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, - 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, + 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, + 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, + 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x39, + 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, - 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, - 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, - 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, - 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, - 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, - 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, - 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, - 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, - 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, - 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, - 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, - 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, - 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, - 0x73, 0x53, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, - 0x0a, 0x0c, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, - 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, - 0x42, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x22, 0x00, 0x12, 0x45, 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, - 0x64, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, - 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, - 0x6e, 0x64, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, - 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, - 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, - 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, - 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x0c, 0x53, 0x74, 0x6f, + 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x09, 0x53, 0x74, + 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, + 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x45, + 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x12, 0x1a, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, + 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, + 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, + 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -2470,7 +2551,7 @@ func file_backend_proto_rawDescGZIP() []byte { } var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 23) +var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 24) var file_backend_proto_goTypes = []interface{}{ (StatusResponse_State)(0), // 0: backend.StatusResponse.State (*StoresKey)(nil), // 1: backend.StoresKey @@ -2495,7 +2576,8 @@ var file_backend_proto_goTypes = []interface{}{ (*TokenizationResponse)(nil), // 20: backend.TokenizationResponse (*MemoryUsageData)(nil), // 21: backend.MemoryUsageData (*StatusResponse)(nil), // 22: backend.StatusResponse - nil, // 23: backend.MemoryUsageData.BreakdownEntry + (*Message)(nil), // 23: backend.Message + nil, // 24: backend.MemoryUsageData.BreakdownEntry } var file_backend_proto_depIdxs = []int32{ 1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey @@ -2507,43 +2589,44 @@ var file_backend_proto_depIdxs = []int32{ 1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey 1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey 2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue - 17, // 9: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment - 23, // 10: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry - 0, // 11: backend.StatusResponse.state:type_name -> backend.StatusResponse.State - 21, // 12: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData - 9, // 13: backend.Backend.Health:input_type -> backend.HealthMessage - 10, // 14: backend.Backend.Predict:input_type -> backend.PredictOptions - 12, // 15: backend.Backend.LoadModel:input_type -> backend.ModelOptions - 10, // 16: backend.Backend.PredictStream:input_type -> backend.PredictOptions - 10, // 17: backend.Backend.Embedding:input_type -> backend.PredictOptions - 18, // 18: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest - 15, // 19: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest - 19, // 20: backend.Backend.TTS:input_type -> backend.TTSRequest - 10, // 21: backend.Backend.TokenizeString:input_type -> backend.PredictOptions - 9, // 22: backend.Backend.Status:input_type -> backend.HealthMessage - 3, // 23: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions - 4, // 24: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions - 5, // 25: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions - 7, // 26: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions - 11, // 27: backend.Backend.Health:output_type -> backend.Reply - 11, // 28: backend.Backend.Predict:output_type -> backend.Reply - 13, // 29: backend.Backend.LoadModel:output_type -> backend.Result - 11, // 30: backend.Backend.PredictStream:output_type -> backend.Reply - 14, // 31: backend.Backend.Embedding:output_type -> backend.EmbeddingResult - 13, // 32: backend.Backend.GenerateImage:output_type -> backend.Result - 16, // 33: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult - 13, // 34: backend.Backend.TTS:output_type -> backend.Result - 20, // 35: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse - 22, // 36: backend.Backend.Status:output_type -> backend.StatusResponse - 13, // 37: backend.Backend.StoresSet:output_type -> backend.Result - 13, // 38: backend.Backend.StoresDelete:output_type -> backend.Result - 6, // 39: backend.Backend.StoresGet:output_type -> backend.StoresGetResult - 8, // 40: backend.Backend.StoresFind:output_type -> backend.StoresFindResult - 27, // [27:41] is the sub-list for method output_type - 13, // [13:27] is the sub-list for method input_type - 13, // [13:13] is the sub-list for extension type_name - 13, // [13:13] is the sub-list for extension extendee - 0, // [0:13] is the sub-list for field type_name + 23, // 9: backend.PredictOptions.Messages:type_name -> backend.Message + 17, // 10: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment + 24, // 11: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry + 0, // 12: backend.StatusResponse.state:type_name -> backend.StatusResponse.State + 21, // 13: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData + 9, // 14: backend.Backend.Health:input_type -> backend.HealthMessage + 10, // 15: backend.Backend.Predict:input_type -> backend.PredictOptions + 12, // 16: backend.Backend.LoadModel:input_type -> backend.ModelOptions + 10, // 17: backend.Backend.PredictStream:input_type -> backend.PredictOptions + 10, // 18: backend.Backend.Embedding:input_type -> backend.PredictOptions + 18, // 19: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest + 15, // 20: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest + 19, // 21: backend.Backend.TTS:input_type -> backend.TTSRequest + 10, // 22: backend.Backend.TokenizeString:input_type -> backend.PredictOptions + 9, // 23: backend.Backend.Status:input_type -> backend.HealthMessage + 3, // 24: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions + 4, // 25: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions + 5, // 26: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions + 7, // 27: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions + 11, // 28: backend.Backend.Health:output_type -> backend.Reply + 11, // 29: backend.Backend.Predict:output_type -> backend.Reply + 13, // 30: backend.Backend.LoadModel:output_type -> backend.Result + 11, // 31: backend.Backend.PredictStream:output_type -> backend.Reply + 14, // 32: backend.Backend.Embedding:output_type -> backend.EmbeddingResult + 13, // 33: backend.Backend.GenerateImage:output_type -> backend.Result + 16, // 34: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult + 13, // 35: backend.Backend.TTS:output_type -> backend.Result + 20, // 36: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse + 22, // 37: backend.Backend.Status:output_type -> backend.StatusResponse + 13, // 38: backend.Backend.StoresSet:output_type -> backend.Result + 13, // 39: backend.Backend.StoresDelete:output_type -> backend.Result + 6, // 40: backend.Backend.StoresGet:output_type -> backend.StoresGetResult + 8, // 41: backend.Backend.StoresFind:output_type -> backend.StoresFindResult + 28, // [28:42] is the sub-list for method output_type + 14, // [14:28] is the sub-list for method input_type + 14, // [14:14] is the sub-list for extension type_name + 14, // [14:14] is the sub-list for extension extendee + 0, // [0:14] is the sub-list for field type_name } func init() { file_backend_proto_init() } @@ -2816,6 +2899,18 @@ func file_backend_proto_init() { return nil } } + file_backend_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Message); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } type x struct{} out := protoimpl.TypeBuilder{ @@ -2823,7 +2918,7 @@ func file_backend_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_backend_proto_rawDesc, NumEnums: 1, - NumMessages: 23, + NumMessages: 24, NumExtensions: 0, NumServices: 1, }, diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go index 0314cd4e..a1f442e0 100644 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ b/pkg/grpc/proto/backend_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.3.0 -// - protoc v4.23.4 +// - protoc v5.26.1 // source: backend.proto package proto From 099bd54ff21311a17f9a33f80d83da6aaa0bc524 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 19:22:30 +0200 Subject: [PATCH 0276/2895] ci: try to build on macos14 (#2011) * ci: try to build on macos14 Signed-off-by: Ettore Di Giacinto * ci: fixup artifact name Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a69a2b05..3c1cea44 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -158,3 +158,47 @@ jobs: with: files: | release/* + + + build-macOS-arm64: + strategy: + matrix: + include: + - build: 'avx2' + defines: '' + - build: 'avx' + defines: '-DLLAMA_AVX2=OFF' + - build: 'avx512' + defines: '-DLLAMA_AVX512=ON' + runs-on: macos-14 + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - uses: actions/setup-go@v5 + with: + go-version: '1.21.x' + cache: false + - name: Dependencies + run: | + brew install protobuf grpc + - name: Build + id: build + env: + CMAKE_ARGS: "${{ matrix.defines }}" + BUILD_ID: "${{ matrix.build }}" + run: | + export C_INCLUDE_PATH=/usr/local/include + export CPLUS_INCLUDE_PATH=/usr/local/include + make dist + - uses: actions/upload-artifact@v4 + with: + name: LocalAI-MacOS-arm64-${{ matrix.build }} + path: release/ + - name: Release + uses: softprops/action-gh-release@v1 + if: startsWith(github.ref, 'refs/tags/') + with: + files: | + release/* From 70c4f110a49fd2f5f0f216932171f3dd0ae0d443 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 20:18:05 +0200 Subject: [PATCH 0277/2895] Update overview.md --- docs/content/docs/overview.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 3c3a397d..6aede1d6 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -67,9 +67,9 @@ Start the image with Docker to have a functional clone of OpenAI! 🚀: docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu # Do you have a Nvidia GPUs? Use this instead # CUDA 11 -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11 # CUDA 12 -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12 ``` See the [💻 Quickstart](https://localai.io/basics/getting_started/) for all the options and way you can run LocalAI! From da82ce81b5dd139932fb3a8e8fd565f36be79d38 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 18:57:33 +0000 Subject: [PATCH 0278/2895] build(deps): bump github.com/opencontainers/runc from 1.1.5 to 1.1.12 (#2000) Bumps [github.com/opencontainers/runc](https://github.com/opencontainers/runc) from 1.1.5 to 1.1.12. - [Release notes](https://github.com/opencontainers/runc/releases) - [Changelog](https://github.com/opencontainers/runc/blob/main/CHANGELOG.md) - [Commits](https://github.com/opencontainers/runc/compare/v1.1.5...v1.1.12) --- updated-dependencies: - dependency-name: github.com/opencontainers/runc dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 36 ++---------------------------------- 2 files changed, 3 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index 298f2d69..99af8ce7 100644 --- a/go.mod +++ b/go.mod @@ -109,7 +109,7 @@ require ( github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect - github.com/opencontainers/runc v1.1.5 // indirect + github.com/opencontainers/runc v1.1.12 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pkoukk/tiktoken-go v0.1.2 // indirect diff --git a/go.sum b/go.sum index 551dd922..a421e79c 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,5 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= @@ -38,20 +37,14 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps= -github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= -github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg= github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -70,7 +63,6 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= @@ -108,7 +100,6 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM= github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= @@ -137,7 +128,6 @@ github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -180,11 +170,8 @@ github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394= @@ -221,10 +208,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= -github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk= github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= -github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= @@ -259,10 +244,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs= -github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= +github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4= github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg= github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= @@ -300,12 +283,10 @@ github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= -github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4= github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E= github.com/shirou/gopsutil/v3 v3.23.9/go.mod h1:x/NWSb71eMcjFIO0vhyGW5nZ7oSIgVjrCnADckb85GA= @@ -315,7 +296,6 @@ github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= @@ -337,7 +317,6 @@ github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM= github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg= github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk= -github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= @@ -349,15 +328,12 @@ github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701/go.mod h1:SiwyRS7s github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= -github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= @@ -407,7 +383,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -425,12 +400,10 @@ golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -439,12 +412,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -494,7 +463,6 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From b2785ff06e3eb7c1d62a6c3921ae706d58c054dd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 00:49:23 +0200 Subject: [PATCH 0279/2895] feat(gallery): support ConfigURLs (#2012) Signed-off-by: Ettore Di Giacinto --- core/http/api_test.go | 24 +++++++++++++++++++++ core/http/endpoints/localai/gallery.go | 4 +++- core/services/gallery.go | 5 +++++ docs/content/docs/features/model-gallery.md | 10 ++++++--- pkg/gallery/op.go | 1 + 5 files changed, 40 insertions(+), 4 deletions(-) diff --git a/core/http/api_test.go b/core/http/api_test.go index 804c15fe..1553ed21 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -43,6 +43,7 @@ Can you help rephrasing sentences? type modelApplyRequest struct { ID string `json:"id"` URL string `json:"url"` + ConfigURL string `json:"config_url"` Name string `json:"name"` Overrides map[string]interface{} `json:"overrides"` } @@ -366,6 +367,29 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred()) Expect(content["backend"]).To(Equal("llama")) }) + It("apply models from config", func() { + response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ + ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml", + }) + + Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) + + uuid := response["uuid"].(string) + + Eventually(func() bool { + response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) + return response["processed"].(bool) + }, "360s", "10s").Should(Equal(true)) + + Eventually(func() []string { + models, _ := client.ListModels(context.TODO()) + modelList := []string{} + for _, m := range models.Models { + modelList = append(modelList, m.ID) + } + return modelList + }, "360s", "10s").Should(ContainElements("hermes-2-pro-mistral")) + }) It("apply models without overrides", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index 5c295a2a..b693e7c3 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -19,7 +19,8 @@ type ModelGalleryEndpointService struct { } type GalleryModel struct { - ID string `json:"id"` + ID string `json:"id"` + ConfigURL string `json:"config_url"` gallery.GalleryModel } @@ -64,6 +65,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe Id: uuid.String(), GalleryName: input.ID, Galleries: mgs.galleries, + ConfigURL: input.ConfigURL, } return c.JSON(struct { ID string `json:"uuid"` diff --git a/core/services/gallery.go b/core/services/gallery.go index 826f4573..b068abbb 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -9,6 +9,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" "gopkg.in/yaml.v2" ) @@ -90,6 +91,9 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader } else { err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) } + } else if op.ConfigURL != "" { + startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + err = cl.Preload(g.modelPath) } else { err = prepareModel(g.modelPath, op.Req, cl, progressCallback) } @@ -129,6 +133,7 @@ func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galler utils.ResetDownloadTimers() if r.ID == "" { err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) + } else { if strings.Contains(r.ID, "@") { err = gallery.InstallModelFromGallery( diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md index 0d978122..05d15ef4 100644 --- a/docs/content/docs/features/model-gallery.md +++ b/docs/content/docs/features/model-gallery.md @@ -146,12 +146,16 @@ In the body of the request you must specify the model configuration file URL (`u ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "" + "config_url": "" }' # or if from a repository curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ "id": "@" }' +# or from a gallery config +curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ + "url": "" + }' ``` An example that installs openllama can be: @@ -159,8 +163,8 @@ An example that installs openllama can be: ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "https://github.com/go-skynet/model-gallery/blob/main/openllama_3b.yaml" - }' + "config_url": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml" + }' ``` The API will return a job `uuid` that you can use to track the job progress: diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go index 873c356d..99796812 100644 --- a/pkg/gallery/op.go +++ b/pkg/gallery/op.go @@ -5,6 +5,7 @@ type GalleryOp struct { Id string Galleries []Gallery GalleryName string + ConfigURL string } type GalleryOpStatus struct { From 677e20756b31ce158b207b246b0ae373f826897e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 12 Apr 2024 00:49:41 +0200 Subject: [PATCH 0280/2895] :arrow_up: Update ggerganov/llama.cpp (#2014) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e2e4f211..e15166a8 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=8228b66dbc16290c5cbd70e80ab47c068e2569d8 +CPPLLAMA_VERSION?=a474f50ebb3e10be3371562f75f3f573f1a86b5f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e0dee52a2ab811fccc18f309a6c5fefcb4725448 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 00:53:43 -0400 Subject: [PATCH 0281/2895] build(deps): bump the pip group across 4 directories with 8 updates (#2017) * build(deps): bump the pip group across 4 directories with 8 updates Bumps the pip group with 1 update in the /examples/functions directory: [langchain](https://github.com/langchain-ai/langchain). Bumps the pip group with 2 updates in the /examples/langchain-chroma directory: [langchain](https://github.com/langchain-ai/langchain) and [llama-index](https://github.com/run-llama/llama_index). Bumps the pip group with 6 updates in the /examples/langchain/langchainpy-localai-example directory: | Package | From | To | | --- | --- | --- | | [langchain](https://github.com/langchain-ai/langchain) | `0.0.159` | `0.1.0` | | [aiohttp](https://github.com/aio-libs/aiohttp) | `3.8.4` | `3.9.2` | | [certifi](https://github.com/certifi/python-certifi) | `2022.12.7` | `2023.7.22` | | [idna](https://github.com/kjd/idna) | `3.4` | `3.7` | | [requests](https://github.com/psf/requests) | `2.29.0` | `2.31.0` | | [urllib3](https://github.com/urllib3/urllib3) | `1.26.15` | `1.26.18` | Bumps the pip group with 1 update in the /examples/streamlit-bot directory: [streamlit](https://github.com/streamlit/streamlit). Updates `langchain` from 0.0.234 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `langchain` from 0.0.160 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `llama-index` from 0.6.2 to 0.9.36 - [Release notes](https://github.com/run-llama/llama_index/releases) - [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md) - [Commits](https://github.com/run-llama/llama_index/compare/v0.6.2...v0.9.36) Updates `langchain` from 0.0.159 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `aiohttp` from 3.8.4 to 3.9.2 - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.4...v3.9.2) Updates `certifi` from 2022.12.7 to 2023.7.22 - [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2023.07.22) Updates `idna` from 3.4 to 3.7 - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7) Updates `requests` from 2.29.0 to 2.31.0 - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.29.0...v2.31.0) Updates `urllib3` from 1.26.15 to 1.26.18 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18) Updates `streamlit` from 1.26.0 to 1.30.0 - [Release notes](https://github.com/streamlit/streamlit/releases) - [Commits](https://github.com/streamlit/streamlit/compare/1.26.0...1.30.0) --- updated-dependencies: - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: llama-index dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: aiohttp dependency-type: direct:production dependency-group: pip - dependency-name: certifi dependency-type: direct:production dependency-group: pip - dependency-name: idna dependency-type: direct:production dependency-group: pip - dependency-name: requests dependency-type: direct:production dependency-group: pip - dependency-name: urllib3 dependency-type: direct:production dependency-group: pip - dependency-name: streamlit dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] * Update version.json PR appears stuck on a check, needs any arbitrary commit to run the security check workflow with write permissions. Bumping docs versions to match latest release as said useless change. Signed-off-by: Dave --------- Signed-off-by: dependabot[bot] Signed-off-by: Dave Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Dave --- docs/data/version.json | 2 +- examples/functions/requirements.txt | 2 +- examples/langchain-chroma/requirements.txt | 4 ++-- .../langchainpy-localai-example/requirements.txt | 12 ++++++------ examples/streamlit-bot/requirements.txt | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/data/version.json b/docs/data/version.json index 1b6a2161..6a618115 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.3" + "version": "v2.12.4" } diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt index 7164e011..759c5b03 100644 --- a/examples/functions/requirements.txt +++ b/examples/functions/requirements.txt @@ -1,2 +1,2 @@ -langchain==0.0.234 +langchain==0.1.0 openai==0.27.8 diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt index b9e649c5..cdf466b9 100644 --- a/examples/langchain-chroma/requirements.txt +++ b/examples/langchain-chroma/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.0.160 +langchain==0.1.0 openai==0.27.6 chromadb==0.3.21 -llama-index==0.6.2 \ No newline at end of file +llama-index==0.9.36 \ No newline at end of file diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 2de5bcf0..1e63b0bf 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,16 +1,16 @@ -aiohttp==3.8.4 +aiohttp==3.9.2 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 -certifi==2022.12.7 +certifi==2023.7.22 charset-normalizer==3.1.0 colorama==0.4.6 dataclasses-json==0.5.7 debugpy==1.6.7 frozenlist==1.3.3 greenlet==2.0.2 -idna==3.4 -langchain==0.0.159 +idna==3.7 +langchain==0.1.0 marshmallow==3.19.0 marshmallow-enum==1.5.1 multidict==6.0.4 @@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4 packaging==23.1 pydantic==1.10.7 PyYAML==6.0 -requests==2.29.0 +requests==2.31.0 SQLAlchemy==2.0.12 tenacity==8.2.2 tqdm==4.65.0 typing-inspect==0.8.0 typing_extensions==4.5.0 -urllib3==1.26.15 +urllib3==1.26.18 yarl==1.9.2 diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt index ae527c76..1fcd5093 100644 --- a/examples/streamlit-bot/requirements.txt +++ b/examples/streamlit-bot/requirements.txt @@ -1,2 +1,2 @@ -streamlit==1.26.0 +streamlit==1.30.0 requests \ No newline at end of file From 7e52c8e21ad3ee054444f90d5b16fd49e3f411b9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:27:40 +0200 Subject: [PATCH 0283/2895] Update CONTRIBUTING.md Signed-off-by: Ettore Di Giacinto --- CONTRIBUTING.md | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0e237ea7..593ad0ed 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing to localAI +# Contributing to LocalAI Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines. @@ -29,8 +29,9 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time 1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git` 2. Navigate to the project directory: `cd LocalAI` -3. Install the required dependencies: `make prepare` -4. Run LocalAI: `make run` +3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally ) +4. Build LocalAI: `make build` +5. Run LocalAI: `./local-ai` ## Contributing @@ -59,14 +60,29 @@ If you find a bug, have a feature request, or encounter any issues, please check `make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed. +### Running AIO tests + +All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be : + +```bash +# Build the LocalAI docker image +make DOCKER_IMAGE=local-ai docker + +# Build the corresponding AIO image +BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio + +# Run the AIO e2e tests +LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio +``` + ## Documentation -- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website) - +We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs + ## Community and Communication - You can reach out via the Github issue tracker. - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions) - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy) ---- \ No newline at end of file +--- From fb105837bac4b1468db5464ab572bb3ec7e61389 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:37:56 +0200 Subject: [PATCH 0284/2895] Update secscan.yaml Signed-off-by: Ettore Di Giacinto --- .github/workflows/secscan.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index 14958070..884b84d5 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -15,13 +15,16 @@ jobs: steps: - name: Checkout Source uses: actions/checkout@v4 + if: ${{ github.actor != 'dependabot[bot]' }} - name: Run Gosec Security Scanner + if: ${{ github.actor != 'dependabot[bot]' }} uses: securego/gosec@master with: # we let the report trigger content trigger a failure using the GitHub Security features. args: '-no-fail -fmt sarif -out results.sarif ./...' - name: Upload SARIF file + if: ${{ github.actor != 'dependabot[bot]' }} uses: github/codeql-action/upload-sarif@v2 with: # Path to SARIF file relative to the root of the repository - sarif_file: results.sarif \ No newline at end of file + sarif_file: results.sarif From 18eea9088a866eab14cd3859af13c96653f89c3a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:38:34 +0200 Subject: [PATCH 0285/2895] Update dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 12541d05..22c709e3 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -1,7 +1,6 @@ name: Dependabot auto-merge on: - pull_request_target: - types: [review_requested] +- pull_request_target permissions: contents: write From 69d638268b67afed91b15ae5b124255569589a47 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:57:13 +0200 Subject: [PATCH 0286/2895] Update dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 22c709e3..f9d03a30 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -40,4 +40,4 @@ jobs: run: gh pr merge --auto --merge "$PR_URL" env: PR_URL: ${{github.event.pull_request.html_url}} - GITHUB_TOKEN: ${{secrets.RELEASE_TOKEN}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} From 0e549424e782e315ee166efdb1cba77a1a4a750b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:59:25 +0200 Subject: [PATCH 0287/2895] Update dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index f9d03a30..51337d20 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -37,7 +37,7 @@ jobs: - name: Enable auto-merge for Dependabot PRs if: ${{ contains(github.event.pull_request.title, 'bump')}} - run: gh pr merge --auto --merge "$PR_URL" + run: gh pr merge --auto --squash "$PR_URL" env: PR_URL: ${{github.event.pull_request.html_url}} GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} From fcb63aed8a969a2419ed593d8facdccf3ab88e5f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 15:36:46 +0000 Subject: [PATCH 0288/2895] build(deps): bump follow-redirects from 1.15.2 to 1.15.6 in /examples/langchain/langchainjs-localai-example (#2020) build(deps): bump follow-redirects Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.15.2 to 1.15.6. - [Release notes](https://github.com/follow-redirects/follow-redirects/releases) - [Commits](https://github.com/follow-redirects/follow-redirects/compare/v1.15.2...v1.15.6) --- updated-dependencies: - dependency-name: follow-redirects dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../langchainjs-localai-example/package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/langchain/langchainjs-localai-example/package-lock.json b/examples/langchain/langchainjs-localai-example/package-lock.json index 29e6999f..e0a45539 100644 --- a/examples/langchain/langchainjs-localai-example/package-lock.json +++ b/examples/langchain/langchainjs-localai-example/package-lock.json @@ -369,9 +369,9 @@ } }, "node_modules/follow-redirects": { - "version": "1.15.2", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", - "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==", + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", "funding": [ { "type": "individual", @@ -1479,9 +1479,9 @@ "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==" }, "follow-redirects": { - "version": "1.15.2", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", - "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==" + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==" }, "form-data": { "version": "4.0.0", From 912d2dccfa63a3a8e6720dda73e30cf8f7d6b944 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 13 Apr 2024 09:13:00 +0200 Subject: [PATCH 0289/2895] :arrow_up: Update ggerganov/llama.cpp (#2024) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e15166a8..0f6d8fd2 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a474f50ebb3e10be3371562f75f3f573f1a86b5f +CPPLLAMA_VERSION?=ab9a3240a9da941fdef5cd4a25f2b97c2f5a67aa # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a8ebf6f575c502684e9f5118cc99622546f73438 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 13 Apr 2024 02:14:32 -0500 Subject: [PATCH 0290/2895] fix: respect concurrency from parent build parameters when building GRPC (#2023) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- backend/cpp/grpc/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/cpp/grpc/Makefile b/backend/cpp/grpc/Makefile index 6a181794..5308693b 100644 --- a/backend/cpp/grpc/Makefile +++ b/backend/cpp/grpc/Makefile @@ -5,7 +5,6 @@ SYSTEM ?= $(HOST_SYSTEM) TAG_LIB_GRPC?=v1.59.0 GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git GIT_CLONE_DEPTH?=1 -NUM_BUILD_THREADS?=$(shell nproc --ignore=1) INSTALLED_PACKAGES=installed_packages GRPC_REPO=grpc_repo @@ -52,7 +51,7 @@ $(GRPC_REPO): $(GRPC_BUILD): $(GRPC_REPO) mkdir -p $(GRPC_BUILD) - cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS} + cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install build: $(INSTALLED_PACKAGES) From 1981154f49437adcbcb9956611aee4809b406947 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 13 Apr 2024 02:37:32 -0500 Subject: [PATCH 0291/2895] fix: dont commit generated files to git (#1993) * fix: initial work towards not committing generated files to the repository Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: improve build docs Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove unused folder from .dockerignore and .gitignore Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: attempt to fix extra backend tests Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: attempt to fix other tests Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more test fixes Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: fix apple tests Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more extras tests fixes Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add GOBIN to PATH in docker build Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: extra tests and Dockerfile corrections Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove build dependency checks Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add golang protobuf compilers to tests-linux action Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: ensure protogen is run for extra backend installs Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: use newer protobuf Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more missing protoc binaries Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: missing dependencies during docker build Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: don't install grpc compilers in the final stage if they aren't needed Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: python-grpc-tools in 22.04 repos is too old Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add a couple of extra build dependencies to Makefile Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: unbreak container rebuild functionality Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .dockerignore | 2 + .github/workflows/test-extra.yml | 29 +- .github/workflows/test.yml | 25 +- .gitignore | 5 + Dockerfile | 33 +- Makefile | 144 +- backend/backend_grpc.pb.go | 457 --- backend/python/autogptq/Makefile | 11 +- backend/python/autogptq/backend_pb2.py | 79 - backend/python/autogptq/backend_pb2_grpc.py | 495 --- backend/python/bark/Makefile | 16 +- backend/python/bark/backend_pb2.py | 79 - backend/python/bark/backend_pb2_grpc.py | 495 --- backend/python/coqui/Makefile | 16 +- backend/python/coqui/backend_pb2.py | 79 - backend/python/coqui/backend_pb2_grpc.py | 495 --- backend/python/diffusers/Makefile | 16 +- backend/python/diffusers/backend_pb2.py | 79 - backend/python/diffusers/backend_pb2_grpc.py | 495 --- backend/python/exllama/Makefile | 14 +- backend/python/exllama/backend_pb2.py | 79 - backend/python/exllama/backend_pb2_grpc.py | 495 --- backend/python/exllama2/Makefile | 14 +- backend/python/exllama2/backend_pb2.py | 79 - backend/python/exllama2/backend_pb2_grpc.py | 495 --- backend/python/mamba/Makefile | 18 +- backend/python/mamba/backend_pb2.py | 79 - backend/python/mamba/backend_pb2_grpc.py | 495 --- backend/python/petals/Makefile | 16 +- backend/python/petals/backend_pb2.py | 79 - backend/python/petals/backend_pb2_grpc.py | 495 --- backend/python/sentencetransformers/Makefile | 16 +- .../sentencetransformers/backend_pb2.py | 79 - .../sentencetransformers/backend_pb2_grpc.py | 495 --- backend/python/transformers-musicgen/Makefile | 17 +- .../transformers-musicgen/backend_pb2.py | 79 - .../transformers-musicgen/backend_pb2_grpc.py | 495 --- backend/python/transformers/Makefile | 16 +- backend/python/transformers/backend_pb2.py | 79 - .../python/transformers/backend_pb2_grpc.py | 495 --- backend/python/vall-e-x/Makefile | 16 +- backend/python/vall-e-x/backend_pb2.py | 79 - backend/python/vall-e-x/backend_pb2_grpc.py | 495 --- backend/python/vllm/Makefile | 18 +- backend/python/vllm/backend_pb2.py | 79 - backend/python/vllm/backend_pb2_grpc.py | 495 --- docs/content/docs/getting-started/build.md | 18 +- pkg/grpc/proto/backend.pb.go | 2934 ----------------- pkg/grpc/proto/backend_grpc.pb.go | 618 ---- 49 files changed, 381 insertions(+), 11550 deletions(-) delete mode 100644 backend/backend_grpc.pb.go delete mode 100644 backend/python/autogptq/backend_pb2.py delete mode 100644 backend/python/autogptq/backend_pb2_grpc.py delete mode 100644 backend/python/bark/backend_pb2.py delete mode 100644 backend/python/bark/backend_pb2_grpc.py delete mode 100644 backend/python/coqui/backend_pb2.py delete mode 100644 backend/python/coqui/backend_pb2_grpc.py delete mode 100644 backend/python/diffusers/backend_pb2.py delete mode 100644 backend/python/diffusers/backend_pb2_grpc.py delete mode 100644 backend/python/exllama/backend_pb2.py delete mode 100644 backend/python/exllama/backend_pb2_grpc.py delete mode 100644 backend/python/exllama2/backend_pb2.py delete mode 100644 backend/python/exllama2/backend_pb2_grpc.py delete mode 100644 backend/python/mamba/backend_pb2.py delete mode 100644 backend/python/mamba/backend_pb2_grpc.py delete mode 100644 backend/python/petals/backend_pb2.py delete mode 100644 backend/python/petals/backend_pb2_grpc.py delete mode 100644 backend/python/sentencetransformers/backend_pb2.py delete mode 100644 backend/python/sentencetransformers/backend_pb2_grpc.py delete mode 100644 backend/python/transformers-musicgen/backend_pb2.py delete mode 100644 backend/python/transformers-musicgen/backend_pb2_grpc.py delete mode 100644 backend/python/transformers/backend_pb2.py delete mode 100644 backend/python/transformers/backend_pb2_grpc.py delete mode 100644 backend/python/vall-e-x/backend_pb2.py delete mode 100644 backend/python/vall-e-x/backend_pb2_grpc.py delete mode 100644 backend/python/vllm/backend_pb2.py delete mode 100644 backend/python/vllm/backend_pb2_grpc.py delete mode 100644 pkg/grpc/proto/backend.pb.go delete mode 100644 pkg/grpc/proto/backend_grpc.pb.go diff --git a/.dockerignore b/.dockerignore index 97e8aa34..2c394c48 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,6 @@ .idea +.github +.vscode models examples/chatbot-ui/models examples/rwkv/models diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 7689f06d..7705783e 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -32,8 +32,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -61,8 +62,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -90,8 +92,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -120,8 +123,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -151,8 +155,9 @@ jobs: # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ # sudo apt-get update && \ # sudo apt-get install -y conda - # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev + # pip install --user grpcio-tools # sudo rm -rfv /usr/bin/conda || true @@ -222,8 +227,9 @@ jobs: # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ # sudo apt-get update && \ # sudo apt-get install -y conda - # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev + # pip install --user grpcio-tools # sudo rm -rfv /usr/bin/conda || true @@ -254,8 +260,9 @@ jobs: # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ # sudo apt-get update && \ # sudo apt-get install -y conda - # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev + # pip install --user grpcio-tools # sudo rm -rfv /usr/bin/conda || true # - name: Test vllm # run: | @@ -280,8 +287,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev + sudo apt-get install -y ca-certificates cmake curl patch python3-pip + sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true - name: Test vall-e-x run: | @@ -307,7 +315,8 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng + sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true - name: Test coqui diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 02093b3f..46c4e065 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -70,17 +70,27 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential ffmpeg + sudo apt-get install build-essential curl ffmpeg curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ - gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake patch python3-pip unzip sudo apt-get install -y libopencv-dev - + + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + + # The python3-grpc-tools package in 22.04 is too old + pip install --user grpcio-tools + sudo rm -rfv /usr/bin/conda || true PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers @@ -89,7 +99,7 @@ jobs: GO_TAGS="tts" make -C sources/go-piper piper.o && \ sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \ # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) - GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build + PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build - name: Cache grpc id: cache-grpc uses: actions/cache@v4 @@ -108,7 +118,7 @@ jobs: cd grpc && cd cmake/build && sudo make --jobs 5 install - name: Test run: | - GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test + PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3 @@ -186,7 +196,8 @@ jobs: run: go version - name: Dependencies run: | - brew install protobuf grpc make + brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc + pip install --user grpcio-tools - name: Test run: | export C_INCLUDE_PATH=/usr/local/include diff --git a/.gitignore b/.gitignore index b48f7391..f1f860e9 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,8 @@ backend-assets/* !backend-assets/.keep prepare /ggml-metal.metal + +# Protobuf generated files +*.pb.go +*pb2.py +*pb2_grpc.py diff --git a/Dockerfile b/Dockerfile index 5fb6230c..d0217d50 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,12 +20,25 @@ ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ - apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean + apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean # Install Go RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz ENV PATH $PATH:/usr/local/go/bin +# Install grpc compilers +ENV PATH $PATH:/root/go/bin +RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \ + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + +# Install protobuf (the version in 22.04 is too old) +RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + +# Install grpcio-tools (the version in 22.04 is too old) +RUN pip install --user grpcio-tools + COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -68,7 +81,8 @@ RUN test -n "$TARGETARCH" \ FROM requirements-core as requirements-extras -RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ +RUN apt install -y gpg && \ + curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \ @@ -100,7 +114,7 @@ ENV MAKEFLAGS=${MAKEFLAGS} WORKDIR /build RUN apt-get update && \ - apt-get install -y g++ cmake git && \ + apt-get install -y build-essential cmake git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -133,6 +147,12 @@ WORKDIR /build COPY . . COPY .git . RUN echo "GO_TAGS: $GO_TAGS" + +RUN apt-get update && \ + apt-get install -y build-essential cmake git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + RUN make prepare # If we are building with clblas support, we need the libraries for the builds @@ -191,6 +211,11 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ apt-get clean \ ; fi +RUN apt-get update && \ + apt-get install -y cmake git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + WORKDIR /build # we start fresh & re-copy all assets because `make build` does not clean up nicely after itself @@ -202,7 +227,7 @@ COPY . . COPY --from=builder /build/sources ./sources/ COPY --from=grpc /build/grpc ./grpc/ -RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc +RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc # Copy the binary COPY --from=builder /build/local-ai ./ diff --git a/Makefile b/Makefile index 0f6d8fd2..5932dfb2 100644 --- a/Makefile +++ b/Makefile @@ -289,10 +289,12 @@ clean: ## Remove build related file rm -rf ./sources rm -rf $(BINARY_NAME) rm -rf release/ - rm -rf backend-assets + rm -rf backend-assets/* $(MAKE) -C backend/cpp/grpc clean $(MAKE) -C backend/cpp/llama clean $(MAKE) dropreplace + $(MAKE) protogen-clean + rmdir pkg/grpc/proto || true clean-tests: rm -rf test-models @@ -416,30 +418,136 @@ help: ## Show this help. else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \ }' $(MAKEFILE_LIST) +.PHONY: protogen protogen: protogen-go protogen-python +.PHONY: protogen-clean +protogen-clean: protogen-go-clean protogen-python-clean + +.PHONY: protogen-go protogen-go: + mkdir -p pkg/grpc/proto protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \ backend/backend.proto -protogen-python: - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto +.PHONY: protogen-go-clean +protogen-go-clean: + $(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go + $(RM) bin/* + +.PHONY: protogen-python +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen + +.PHONY: protogen-python-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean + +.PHONY: autogptq-protogen +autogptq-protogen: + $(MAKE) -C backend/python/autogptq protogen + +.PHONY: autogptq-protogen-clean +autogptq-protogen-clean: + $(MAKE) -C backend/python/autogptq protogen-clean + +.PHONY: bark-protogen +bark-protogen: + $(MAKE) -C backend/python/bark protogen + +.PHONY: bark-protogen-clean +bark-protogen-clean: + $(MAKE) -C backend/python/bark protogen-clean + +.PHONY: coqui-protogen +coqui-protogen: + $(MAKE) -C backend/python/coqui protogen + +.PHONY: coqui-protogen-clean +coqui-protogen-clean: + $(MAKE) -C backend/python/coqui protogen-clean + +.PHONY: diffusers-protogen +diffusers-protogen: + $(MAKE) -C backend/python/diffusers protogen + +.PHONY: diffusers-protogen-clean +diffusers-protogen-clean: + $(MAKE) -C backend/python/diffusers protogen-clean + +.PHONY: exllama-protogen +exllama-protogen: + $(MAKE) -C backend/python/exllama protogen + +.PHONY: exllama-protogen-clean +exllama-protogen-clean: + $(MAKE) -C backend/python/exllama protogen-clean + +.PHONY: exllama2-protogen +exllama2-protogen: + $(MAKE) -C backend/python/exllama2 protogen + +.PHONY: exllama2-protogen-clean +exllama2-protogen-clean: + $(MAKE) -C backend/python/exllama2 protogen-clean + +.PHONY: mamba-protogen +mamba-protogen: + $(MAKE) -C backend/python/mamba protogen + +.PHONY: mamba-protogen-clean +mamba-protogen-clean: + $(MAKE) -C backend/python/mamba protogen-clean + +.PHONY: petals-protogen +petals-protogen: + $(MAKE) -C backend/python/petals protogen + +.PHONY: petals-protogen-clean +petals-protogen-clean: + $(MAKE) -C backend/python/petals protogen-clean + +.PHONY: sentencetransformers-protogen +sentencetransformers-protogen: + $(MAKE) -C backend/python/sentencetransformers protogen + +.PHONY: sentencetransformers-protogen-clean +sentencetransformers-protogen-clean: + $(MAKE) -C backend/python/sentencetransformers protogen-clean + +.PHONY: transformers-protogen +transformers-protogen: + $(MAKE) -C backend/python/transformers protogen + +.PHONY: transformers-protogen-clean +transformers-protogen-clean: + $(MAKE) -C backend/python/transformers protogen-clean + +.PHONY: transformers-musicgen-protogen +transformers-musicgen-protogen: + $(MAKE) -C backend/python/transformers-musicgen protogen + +.PHONY: transformers-musicgen-protogen-clean +transformers-musicgen-protogen-clean: + $(MAKE) -C backend/python/transformers-musicgen protogen-clean + +.PHONY: vall-e-x-protogen +vall-e-x-protogen: + $(MAKE) -C backend/python/vall-e-x protogen + +.PHONY: vall-e-x-protogen-clean +vall-e-x-protogen-clean: + $(MAKE) -C backend/python/vall-e-x protogen-clean + +.PHONY: vllm-protogen +vllm-protogen: + $(MAKE) -C backend/python/vllm protogen + +.PHONY: vllm-protogen-clean +vllm-protogen-clean: + $(MAKE) -C backend/python/vllm protogen-clean ## GRPC # Note: it is duplicated in the Dockerfile -prepare-extra-conda-environments: +prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/autogptq $(MAKE) -C backend/python/bark $(MAKE) -C backend/python/coqui @@ -454,7 +562,7 @@ prepare-extra-conda-environments: $(MAKE) -C backend/python/petals $(MAKE) -C backend/python/exllama2 -prepare-test-extra: +prepare-test-extra: protogen-python $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/diffusers @@ -478,7 +586,7 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/ @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true -backend-assets/grpc: replace +backend-assets/grpc: protogen-go replace mkdir -p backend-assets/grpc backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc diff --git a/backend/backend_grpc.pb.go b/backend/backend_grpc.pb.go deleted file mode 100644 index 5c97691d..00000000 --- a/backend/backend_grpc.pb.go +++ /dev/null @@ -1,457 +0,0 @@ -// Code generated by protoc-gen-go-grpc. DO NOT EDIT. -// versions: -// - protoc-gen-go-grpc v1.2.0 -// - protoc v4.23.4 -// source: backend/backend.proto - -package proto - -import ( - context "context" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.32.0 or later. -const _ = grpc.SupportPackageIsVersion7 - -// BackendClient is the client API for Backend service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -type BackendClient interface { - Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) - Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) - LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) - PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) - Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) - GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) - AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) - TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) - TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) - Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) -} - -type backendClient struct { - cc grpc.ClientConnInterface -} - -func NewBackendClient(cc grpc.ClientConnInterface) BackendClient { - return &backendClient{cc} -} - -func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) { - stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...) - if err != nil { - return nil, err - } - x := &backendPredictStreamClient{stream} - if err := x.ClientStream.SendMsg(in); err != nil { - return nil, err - } - if err := x.ClientStream.CloseSend(); err != nil { - return nil, err - } - return x, nil -} - -type Backend_PredictStreamClient interface { - Recv() (*Reply, error) - grpc.ClientStream -} - -type backendPredictStreamClient struct { - grpc.ClientStream -} - -func (x *backendPredictStreamClient) Recv() (*Reply, error) { - m := new(Reply) - if err := x.ClientStream.RecvMsg(m); err != nil { - return nil, err - } - return m, nil -} - -func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) { - out := new(EmbeddingResult) - err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) { - out := new(TranscriptResult) - err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) { - out := new(TokenizationResponse) - err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) { - out := new(StatusResponse) - err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -// BackendServer is the server API for Backend service. -// All implementations must embed UnimplementedBackendServer -// for forward compatibility -type BackendServer interface { - Health(context.Context, *HealthMessage) (*Reply, error) - Predict(context.Context, *PredictOptions) (*Reply, error) - LoadModel(context.Context, *ModelOptions) (*Result, error) - PredictStream(*PredictOptions, Backend_PredictStreamServer) error - Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) - GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) - AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) - TTS(context.Context, *TTSRequest) (*Result, error) - TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) - Status(context.Context, *HealthMessage) (*StatusResponse, error) - mustEmbedUnimplementedBackendServer() -} - -// UnimplementedBackendServer must be embedded to have forward compatible implementations. -type UnimplementedBackendServer struct { -} - -func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Health not implemented") -} -func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented") -} -func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented") -} -func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error { - return status.Errorf(codes.Unimplemented, "method PredictStream not implemented") -} -func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented") -} -func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented") -} -func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented") -} -func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented") -} -func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented") -} -func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method Status not implemented") -} -func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {} - -// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service. -// Use of this interface is not recommended, as added methods to BackendServer will -// result in compilation errors. -type UnsafeBackendServer interface { - mustEmbedUnimplementedBackendServer() -} - -func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) { - s.RegisterService(&Backend_ServiceDesc, srv) -} - -func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Health(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Health", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Health(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Predict(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Predict", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Predict(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ModelOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).LoadModel(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/LoadModel", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error { - m := new(PredictOptions) - if err := stream.RecvMsg(m); err != nil { - return err - } - return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream}) -} - -type Backend_PredictStreamServer interface { - Send(*Reply) error - grpc.ServerStream -} - -type backendPredictStreamServer struct { - grpc.ServerStream -} - -func (x *backendPredictStreamServer) Send(m *Reply) error { - return x.ServerStream.SendMsg(m) -} - -func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Embedding(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Embedding", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(GenerateImageRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).GenerateImage(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/GenerateImage", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TranscriptRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).AudioTranscription(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/AudioTranscription", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TTSRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TTS(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/TTS", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TTS(ctx, req.(*TTSRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TokenizeString(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/TokenizeString", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Status(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Status", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Status(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service. -// It's only intended for direct use with grpc.RegisterService, -// and not to be introspected or modified (even as a copy) -var Backend_ServiceDesc = grpc.ServiceDesc{ - ServiceName: "backend.Backend", - HandlerType: (*BackendServer)(nil), - Methods: []grpc.MethodDesc{ - { - MethodName: "Health", - Handler: _Backend_Health_Handler, - }, - { - MethodName: "Predict", - Handler: _Backend_Predict_Handler, - }, - { - MethodName: "LoadModel", - Handler: _Backend_LoadModel_Handler, - }, - { - MethodName: "Embedding", - Handler: _Backend_Embedding_Handler, - }, - { - MethodName: "GenerateImage", - Handler: _Backend_GenerateImage_Handler, - }, - { - MethodName: "AudioTranscription", - Handler: _Backend_AudioTranscription_Handler, - }, - { - MethodName: "TTS", - Handler: _Backend_TTS_Handler, - }, - { - MethodName: "TokenizeString", - Handler: _Backend_TokenizeString_Handler, - }, - { - MethodName: "Status", - Handler: _Backend_Status_Handler, - }, - }, - Streams: []grpc.StreamDesc{ - { - StreamName: "PredictStream", - Handler: _Backend_PredictStream_Handler, - ServerStreams: true, - }, - }, - Metadata: "backend/backend.proto", -} diff --git a/backend/python/autogptq/Makefile b/backend/python/autogptq/Makefile index dfae12c1..eb81f045 100644 --- a/backend/python/autogptq/Makefile +++ b/backend/python/autogptq/Makefile @@ -1,4 +1,13 @@ .PHONY: autogptq -autogptq: +autogptq: protogen $(MAKE) -C ../common-env/transformers +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/autogptq/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/autogptq/backend_pb2_grpc.py b/backend/python/autogptq/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/autogptq/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/bark/Makefile b/backend/python/bark/Makefile index 68f73b29..a16308f7 100644 --- a/backend/python/bark/Makefile +++ b/backend/python/bark/Makefile @@ -1,15 +1,25 @@ .PHONY: ttsbark -ttsbark: +ttsbark: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running bark..." bash run.sh @echo "bark run." .PHONY: test -test: +test: protogen @echo "Testing bark..." bash test.sh @echo "bark tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/bark/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/bark/backend_pb2_grpc.py b/backend/python/bark/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/bark/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/coqui/Makefile b/backend/python/coqui/Makefile index e0ec9001..475804c9 100644 --- a/backend/python/coqui/Makefile +++ b/backend/python/coqui/Makefile @@ -1,15 +1,25 @@ .PHONY: coqui -coqui: +coqui: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running coqui..." bash run.sh @echo "coqui run." .PHONY: test -test: +test: protogen @echo "Testing coqui..." bash test.sh @echo "coqui tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/coqui/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/coqui/backend_pb2_grpc.py b/backend/python/coqui/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/coqui/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index 40e1d1a7..c73efdd2 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -12,15 +12,25 @@ export SKIP_CONDA=1 endif .PHONY: diffusers -diffusers: +diffusers: protogen @echo "Installing $(CONDA_ENV_PATH)..." bash install.sh $(CONDA_ENV_PATH) .PHONY: run -run: +run: protogen @echo "Running diffusers..." bash run.sh @echo "Diffusers run." -test: +test: protogen bash test.sh + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/diffusers/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/diffusers/backend_pb2_grpc.py b/backend/python/diffusers/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/diffusers/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama/Makefile b/backend/python/exllama/Makefile index b51adf76..15623448 100644 --- a/backend/python/exllama/Makefile +++ b/backend/python/exllama/Makefile @@ -1,11 +1,21 @@ export CONDA_ENV_PATH = "exllama.yml" .PHONY: exllama -exllama: +exllama: protogen bash install.sh ${CONDA_ENV_PATH} .PHONY: run -run: +run: protogen @echo "Running exllama..." bash run.sh @echo "exllama run." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/exllama/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/backend_pb2_grpc.py b/backend/python/exllama/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/exllama/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama2/Makefile b/backend/python/exllama2/Makefile index 24158151..6d6776b7 100644 --- a/backend/python/exllama2/Makefile +++ b/backend/python/exllama2/Makefile @@ -1,10 +1,20 @@ .PHONY: exllama2 -exllama2: +exllama2: protogen $(MAKE) -C ../common-env/transformers bash install.sh .PHONY: run -run: +run: protogen @echo "Running exllama2..." bash run.sh @echo "exllama2 run." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/exllama2/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama2/backend_pb2_grpc.py b/backend/python/exllama2/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/exllama2/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/mamba/Makefile b/backend/python/mamba/Makefile index 3ff00346..ca18e609 100644 --- a/backend/python/mamba/Makefile +++ b/backend/python/mamba/Makefile @@ -1,16 +1,26 @@ .PHONY: mamba -mamba: +mamba: protogen $(MAKE) -C ../common-env/transformers bash install.sh .PHONY: run -run: +run: protogen @echo "Running mamba..." bash run.sh @echo "mamba run." .PHONY: test -test: +test: protogen @echo "Testing mamba..." bash test.sh - @echo "mamba tested." \ No newline at end of file + @echo "mamba tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/mamba/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/mamba/backend_pb2_grpc.py b/backend/python/mamba/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/mamba/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile index aa7778e1..0ed64a07 100644 --- a/backend/python/petals/Makefile +++ b/backend/python/petals/Makefile @@ -1,17 +1,27 @@ .PHONY: petals -petals: +petals: protogen @echo "Creating virtual environment..." bash install.sh "petals.yml" @echo "Virtual environment created." .PHONY: run -run: +run: protogen @echo "Running petals..." bash run.sh @echo "petals run." .PHONY: test -test: +test: protogen @echo "Testing petals..." bash test.sh @echo "petals tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/petals/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/petals/backend_pb2_grpc.py b/backend/python/petals/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/petals/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile index 7dbde5cf..ac442897 100644 --- a/backend/python/sentencetransformers/Makefile +++ b/backend/python/sentencetransformers/Makefile @@ -1,17 +1,27 @@ .PHONY: sentencetransformers -sentencetransformers: +sentencetransformers: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running sentencetransformers..." bash run.sh @echo "sentencetransformers run." # It is not working well by using command line. It only6 works with IDE like VSCode. .PHONY: test -test: +test: protogen @echo "Testing sentencetransformers..." bash test.sh @echo "sentencetransformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/sentencetransformers/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/sentencetransformers/backend_pb2_grpc.py b/backend/python/sentencetransformers/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/sentencetransformers/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers-musicgen/Makefile b/backend/python/transformers-musicgen/Makefile index a2969d84..e28a356d 100644 --- a/backend/python/transformers-musicgen/Makefile +++ b/backend/python/transformers-musicgen/Makefile @@ -1,16 +1,25 @@ - .PHONY: transformers-musicgen -transformers-musicgen: +transformers-musicgen: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running transformers..." bash run.sh @echo "transformers run." .PHONY: test -test: +test: protogen @echo "Testing transformers..." bash test.sh @echo "transformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/transformers-musicgen/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers-musicgen/backend_pb2_grpc.py b/backend/python/transformers-musicgen/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/transformers-musicgen/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers/Makefile b/backend/python/transformers/Makefile index 4eeb9ad5..afe48405 100644 --- a/backend/python/transformers/Makefile +++ b/backend/python/transformers/Makefile @@ -1,16 +1,26 @@ .PHONY: transformers -transformers: +transformers: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running transformers..." bash run.sh @echo "transformers run." # It is not working well by using command line. It only6 works with IDE like VSCode. .PHONY: test -test: +test: protogen @echo "Testing transformers..." bash test.sh @echo "transformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/transformers/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/backend_pb2_grpc.py b/backend/python/transformers/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/transformers/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile index 8f34f559..d7a80e55 100644 --- a/backend/python/vall-e-x/Makefile +++ b/backend/python/vall-e-x/Makefile @@ -3,18 +3,28 @@ export SKIP_CONDA=1 endif .PHONY: ttsvalle -ttsvalle: +ttsvalle: protogen $(MAKE) -C ../common-env/transformers bash install.sh .PHONY: run -run: +run: protogen @echo "Running ttsvalle..." bash run.sh @echo "ttsvalle run." .PHONY: test -test: +test: protogen @echo "Testing valle..." bash test.sh @echo "valle tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/vall-e-x/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/vall-e-x/backend_pb2_grpc.py b/backend/python/vall-e-x/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/vall-e-x/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vllm/Makefile b/backend/python/vllm/Makefile index 9ee5886d..3e1fdd77 100644 --- a/backend/python/vllm/Makefile +++ b/backend/python/vllm/Makefile @@ -1,15 +1,25 @@ .PHONY: vllm -vllm: +vllm: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running vllm..." bash run.sh @echo "vllm run." .PHONY: test -test: +test: protogen @echo "Testing vllm..." bash test.sh - @echo "vllm tested." \ No newline at end of file + @echo "vllm tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/vllm/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/vllm/backend_pb2_grpc.py b/backend/python/vllm/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/vllm/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index 8ceaf1f5..a4db135e 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -36,14 +36,28 @@ To install the dependencies follow the instructions below: Install `xcode` from the App Store ```bash -brew install abseil cmake go grpc protobuf wget +brew install abseil cmake go grpc protobuf protoc-gen-go protoc-gen-go-grpc python wget +``` + +After installing the above dependencies, you need to install grpcio-tools from PyPI. You could do this via a pip --user install or a virtualenv. + +```bash +pip install --user grpcio-tools ``` {{% /tab %}} {{% tab tabName="Debian" %}} ```bash -apt install golang protobuf-compiler-grpc libgrpc-dev make cmake +apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-tools +``` + +After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands + +```bash +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + ``` {{% /tab %}} diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go deleted file mode 100644 index e9afe196..00000000 --- a/pkg/grpc/proto/backend.pb.go +++ /dev/null @@ -1,2934 +0,0 @@ -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.26.0 -// protoc v5.26.1 -// source: backend.proto - -package proto - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -type StatusResponse_State int32 - -const ( - StatusResponse_UNINITIALIZED StatusResponse_State = 0 - StatusResponse_BUSY StatusResponse_State = 1 - StatusResponse_READY StatusResponse_State = 2 - StatusResponse_ERROR StatusResponse_State = -1 -) - -// Enum value maps for StatusResponse_State. -var ( - StatusResponse_State_name = map[int32]string{ - 0: "UNINITIALIZED", - 1: "BUSY", - 2: "READY", - -1: "ERROR", - } - StatusResponse_State_value = map[string]int32{ - "UNINITIALIZED": 0, - "BUSY": 1, - "READY": 2, - "ERROR": -1, - } -) - -func (x StatusResponse_State) Enum() *StatusResponse_State { - p := new(StatusResponse_State) - *p = x - return p -} - -func (x StatusResponse_State) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (StatusResponse_State) Descriptor() protoreflect.EnumDescriptor { - return file_backend_proto_enumTypes[0].Descriptor() -} - -func (StatusResponse_State) Type() protoreflect.EnumType { - return &file_backend_proto_enumTypes[0] -} - -func (x StatusResponse_State) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Use StatusResponse_State.Descriptor instead. -func (StatusResponse_State) EnumDescriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{21, 0} -} - -type StoresKey struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Floats []float32 `protobuf:"fixed32,1,rep,packed,name=Floats,proto3" json:"Floats,omitempty"` -} - -func (x *StoresKey) Reset() { - *x = StoresKey{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresKey) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresKey) ProtoMessage() {} - -func (x *StoresKey) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresKey.ProtoReflect.Descriptor instead. -func (*StoresKey) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{0} -} - -func (x *StoresKey) GetFloats() []float32 { - if x != nil { - return x.Floats - } - return nil -} - -type StoresValue struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Bytes []byte `protobuf:"bytes,1,opt,name=Bytes,proto3" json:"Bytes,omitempty"` -} - -func (x *StoresValue) Reset() { - *x = StoresValue{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresValue) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresValue) ProtoMessage() {} - -func (x *StoresValue) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresValue.ProtoReflect.Descriptor instead. -func (*StoresValue) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{1} -} - -func (x *StoresValue) GetBytes() []byte { - if x != nil { - return x.Bytes - } - return nil -} - -type StoresSetOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` - Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` -} - -func (x *StoresSetOptions) Reset() { - *x = StoresSetOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresSetOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresSetOptions) ProtoMessage() {} - -func (x *StoresSetOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresSetOptions.ProtoReflect.Descriptor instead. -func (*StoresSetOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{2} -} - -func (x *StoresSetOptions) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -func (x *StoresSetOptions) GetValues() []*StoresValue { - if x != nil { - return x.Values - } - return nil -} - -type StoresDeleteOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` -} - -func (x *StoresDeleteOptions) Reset() { - *x = StoresDeleteOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresDeleteOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresDeleteOptions) ProtoMessage() {} - -func (x *StoresDeleteOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresDeleteOptions.ProtoReflect.Descriptor instead. -func (*StoresDeleteOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{3} -} - -func (x *StoresDeleteOptions) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -type StoresGetOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` -} - -func (x *StoresGetOptions) Reset() { - *x = StoresGetOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresGetOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresGetOptions) ProtoMessage() {} - -func (x *StoresGetOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[4] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresGetOptions.ProtoReflect.Descriptor instead. -func (*StoresGetOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{4} -} - -func (x *StoresGetOptions) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -type StoresGetResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` - Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` -} - -func (x *StoresGetResult) Reset() { - *x = StoresGetResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresGetResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresGetResult) ProtoMessage() {} - -func (x *StoresGetResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[5] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresGetResult.ProtoReflect.Descriptor instead. -func (*StoresGetResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{5} -} - -func (x *StoresGetResult) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -func (x *StoresGetResult) GetValues() []*StoresValue { - if x != nil { - return x.Values - } - return nil -} - -type StoresFindOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Key *StoresKey `protobuf:"bytes,1,opt,name=Key,proto3" json:"Key,omitempty"` - TopK int32 `protobuf:"varint,2,opt,name=TopK,proto3" json:"TopK,omitempty"` -} - -func (x *StoresFindOptions) Reset() { - *x = StoresFindOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresFindOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresFindOptions) ProtoMessage() {} - -func (x *StoresFindOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[6] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresFindOptions.ProtoReflect.Descriptor instead. -func (*StoresFindOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{6} -} - -func (x *StoresFindOptions) GetKey() *StoresKey { - if x != nil { - return x.Key - } - return nil -} - -func (x *StoresFindOptions) GetTopK() int32 { - if x != nil { - return x.TopK - } - return 0 -} - -type StoresFindResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` - Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` - Similarities []float32 `protobuf:"fixed32,3,rep,packed,name=Similarities,proto3" json:"Similarities,omitempty"` -} - -func (x *StoresFindResult) Reset() { - *x = StoresFindResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresFindResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresFindResult) ProtoMessage() {} - -func (x *StoresFindResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresFindResult.ProtoReflect.Descriptor instead. -func (*StoresFindResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{7} -} - -func (x *StoresFindResult) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -func (x *StoresFindResult) GetValues() []*StoresValue { - if x != nil { - return x.Values - } - return nil -} - -func (x *StoresFindResult) GetSimilarities() []float32 { - if x != nil { - return x.Similarities - } - return nil -} - -type HealthMessage struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields -} - -func (x *HealthMessage) Reset() { - *x = HealthMessage{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *HealthMessage) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*HealthMessage) ProtoMessage() {} - -func (x *HealthMessage) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[8] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead. -func (*HealthMessage) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{8} -} - -// The request message containing the user's name. -type PredictOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` - Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` - Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` - Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` - TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` - Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` - Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` - NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` - Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` - Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` - F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` - DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` - StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` - IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` - TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` - TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` - FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` - PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` - Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` - MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` - MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` - PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` - LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` - MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` - MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` - PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` - PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` - Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` - MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` - TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` - TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` - PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` - Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` - EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"` - Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` - RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` - RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` - NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"` - NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"` - NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"` - Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"` - UseTokenizerTemplate bool `protobuf:"varint,43,opt,name=UseTokenizerTemplate,proto3" json:"UseTokenizerTemplate,omitempty"` - Messages []*Message `protobuf:"bytes,44,rep,name=Messages,proto3" json:"Messages,omitempty"` -} - -func (x *PredictOptions) Reset() { - *x = PredictOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[9] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *PredictOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*PredictOptions) ProtoMessage() {} - -func (x *PredictOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[9] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead. -func (*PredictOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{9} -} - -func (x *PredictOptions) GetPrompt() string { - if x != nil { - return x.Prompt - } - return "" -} - -func (x *PredictOptions) GetSeed() int32 { - if x != nil { - return x.Seed - } - return 0 -} - -func (x *PredictOptions) GetThreads() int32 { - if x != nil { - return x.Threads - } - return 0 -} - -func (x *PredictOptions) GetTokens() int32 { - if x != nil { - return x.Tokens - } - return 0 -} - -func (x *PredictOptions) GetTopK() int32 { - if x != nil { - return x.TopK - } - return 0 -} - -func (x *PredictOptions) GetRepeat() int32 { - if x != nil { - return x.Repeat - } - return 0 -} - -func (x *PredictOptions) GetBatch() int32 { - if x != nil { - return x.Batch - } - return 0 -} - -func (x *PredictOptions) GetNKeep() int32 { - if x != nil { - return x.NKeep - } - return 0 -} - -func (x *PredictOptions) GetTemperature() float32 { - if x != nil { - return x.Temperature - } - return 0 -} - -func (x *PredictOptions) GetPenalty() float32 { - if x != nil { - return x.Penalty - } - return 0 -} - -func (x *PredictOptions) GetF16KV() bool { - if x != nil { - return x.F16KV - } - return false -} - -func (x *PredictOptions) GetDebugMode() bool { - if x != nil { - return x.DebugMode - } - return false -} - -func (x *PredictOptions) GetStopPrompts() []string { - if x != nil { - return x.StopPrompts - } - return nil -} - -func (x *PredictOptions) GetIgnoreEOS() bool { - if x != nil { - return x.IgnoreEOS - } - return false -} - -func (x *PredictOptions) GetTailFreeSamplingZ() float32 { - if x != nil { - return x.TailFreeSamplingZ - } - return 0 -} - -func (x *PredictOptions) GetTypicalP() float32 { - if x != nil { - return x.TypicalP - } - return 0 -} - -func (x *PredictOptions) GetFrequencyPenalty() float32 { - if x != nil { - return x.FrequencyPenalty - } - return 0 -} - -func (x *PredictOptions) GetPresencePenalty() float32 { - if x != nil { - return x.PresencePenalty - } - return 0 -} - -func (x *PredictOptions) GetMirostat() int32 { - if x != nil { - return x.Mirostat - } - return 0 -} - -func (x *PredictOptions) GetMirostatETA() float32 { - if x != nil { - return x.MirostatETA - } - return 0 -} - -func (x *PredictOptions) GetMirostatTAU() float32 { - if x != nil { - return x.MirostatTAU - } - return 0 -} - -func (x *PredictOptions) GetPenalizeNL() bool { - if x != nil { - return x.PenalizeNL - } - return false -} - -func (x *PredictOptions) GetLogitBias() string { - if x != nil { - return x.LogitBias - } - return "" -} - -func (x *PredictOptions) GetMLock() bool { - if x != nil { - return x.MLock - } - return false -} - -func (x *PredictOptions) GetMMap() bool { - if x != nil { - return x.MMap - } - return false -} - -func (x *PredictOptions) GetPromptCacheAll() bool { - if x != nil { - return x.PromptCacheAll - } - return false -} - -func (x *PredictOptions) GetPromptCacheRO() bool { - if x != nil { - return x.PromptCacheRO - } - return false -} - -func (x *PredictOptions) GetGrammar() string { - if x != nil { - return x.Grammar - } - return "" -} - -func (x *PredictOptions) GetMainGPU() string { - if x != nil { - return x.MainGPU - } - return "" -} - -func (x *PredictOptions) GetTensorSplit() string { - if x != nil { - return x.TensorSplit - } - return "" -} - -func (x *PredictOptions) GetTopP() float32 { - if x != nil { - return x.TopP - } - return 0 -} - -func (x *PredictOptions) GetPromptCachePath() string { - if x != nil { - return x.PromptCachePath - } - return "" -} - -func (x *PredictOptions) GetDebug() bool { - if x != nil { - return x.Debug - } - return false -} - -func (x *PredictOptions) GetEmbeddingTokens() []int32 { - if x != nil { - return x.EmbeddingTokens - } - return nil -} - -func (x *PredictOptions) GetEmbeddings() string { - if x != nil { - return x.Embeddings - } - return "" -} - -func (x *PredictOptions) GetRopeFreqBase() float32 { - if x != nil { - return x.RopeFreqBase - } - return 0 -} - -func (x *PredictOptions) GetRopeFreqScale() float32 { - if x != nil { - return x.RopeFreqScale - } - return 0 -} - -func (x *PredictOptions) GetNegativePromptScale() float32 { - if x != nil { - return x.NegativePromptScale - } - return 0 -} - -func (x *PredictOptions) GetNegativePrompt() string { - if x != nil { - return x.NegativePrompt - } - return "" -} - -func (x *PredictOptions) GetNDraft() int32 { - if x != nil { - return x.NDraft - } - return 0 -} - -func (x *PredictOptions) GetImages() []string { - if x != nil { - return x.Images - } - return nil -} - -func (x *PredictOptions) GetUseTokenizerTemplate() bool { - if x != nil { - return x.UseTokenizerTemplate - } - return false -} - -func (x *PredictOptions) GetMessages() []*Message { - if x != nil { - return x.Messages - } - return nil -} - -// The response message containing the result -type Reply struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Message []byte `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` -} - -func (x *Reply) Reset() { - *x = Reply{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[10] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Reply) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Reply) ProtoMessage() {} - -func (x *Reply) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[10] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Reply.ProtoReflect.Descriptor instead. -func (*Reply) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{10} -} - -func (x *Reply) GetMessage() []byte { - if x != nil { - return x.Message - } - return nil -} - -type ModelOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"` - ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"` - Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"` - NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"` - F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"` - MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"` - MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"` - VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"` - LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"` - Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` - NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"` - NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"` - MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` - TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` - Threads int32 `protobuf:"varint,15,opt,name=Threads,proto3" json:"Threads,omitempty"` - LibrarySearchPath string `protobuf:"bytes,16,opt,name=LibrarySearchPath,proto3" json:"LibrarySearchPath,omitempty"` - RopeFreqBase float32 `protobuf:"fixed32,17,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` - RopeFreqScale float32 `protobuf:"fixed32,18,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` - RMSNormEps float32 `protobuf:"fixed32,19,opt,name=RMSNormEps,proto3" json:"RMSNormEps,omitempty"` - NGQA int32 `protobuf:"varint,20,opt,name=NGQA,proto3" json:"NGQA,omitempty"` - ModelFile string `protobuf:"bytes,21,opt,name=ModelFile,proto3" json:"ModelFile,omitempty"` - // AutoGPTQ - Device string `protobuf:"bytes,22,opt,name=Device,proto3" json:"Device,omitempty"` - UseTriton bool `protobuf:"varint,23,opt,name=UseTriton,proto3" json:"UseTriton,omitempty"` - ModelBaseName string `protobuf:"bytes,24,opt,name=ModelBaseName,proto3" json:"ModelBaseName,omitempty"` - UseFastTokenizer bool `protobuf:"varint,25,opt,name=UseFastTokenizer,proto3" json:"UseFastTokenizer,omitempty"` - // Diffusers - PipelineType string `protobuf:"bytes,26,opt,name=PipelineType,proto3" json:"PipelineType,omitempty"` - SchedulerType string `protobuf:"bytes,27,opt,name=SchedulerType,proto3" json:"SchedulerType,omitempty"` - CUDA bool `protobuf:"varint,28,opt,name=CUDA,proto3" json:"CUDA,omitempty"` - CFGScale float32 `protobuf:"fixed32,29,opt,name=CFGScale,proto3" json:"CFGScale,omitempty"` - IMG2IMG bool `protobuf:"varint,30,opt,name=IMG2IMG,proto3" json:"IMG2IMG,omitempty"` - CLIPModel string `protobuf:"bytes,31,opt,name=CLIPModel,proto3" json:"CLIPModel,omitempty"` - CLIPSubfolder string `protobuf:"bytes,32,opt,name=CLIPSubfolder,proto3" json:"CLIPSubfolder,omitempty"` - CLIPSkip int32 `protobuf:"varint,33,opt,name=CLIPSkip,proto3" json:"CLIPSkip,omitempty"` - ControlNet string `protobuf:"bytes,48,opt,name=ControlNet,proto3" json:"ControlNet,omitempty"` - Tokenizer string `protobuf:"bytes,34,opt,name=Tokenizer,proto3" json:"Tokenizer,omitempty"` - // LLM (llama.cpp) - LoraBase string `protobuf:"bytes,35,opt,name=LoraBase,proto3" json:"LoraBase,omitempty"` - LoraAdapter string `protobuf:"bytes,36,opt,name=LoraAdapter,proto3" json:"LoraAdapter,omitempty"` - LoraScale float32 `protobuf:"fixed32,42,opt,name=LoraScale,proto3" json:"LoraScale,omitempty"` - NoMulMatQ bool `protobuf:"varint,37,opt,name=NoMulMatQ,proto3" json:"NoMulMatQ,omitempty"` - DraftModel string `protobuf:"bytes,39,opt,name=DraftModel,proto3" json:"DraftModel,omitempty"` - AudioPath string `protobuf:"bytes,38,opt,name=AudioPath,proto3" json:"AudioPath,omitempty"` - // vllm - Quantization string `protobuf:"bytes,40,opt,name=Quantization,proto3" json:"Quantization,omitempty"` - GPUMemoryUtilization float32 `protobuf:"fixed32,50,opt,name=GPUMemoryUtilization,proto3" json:"GPUMemoryUtilization,omitempty"` - TrustRemoteCode bool `protobuf:"varint,51,opt,name=TrustRemoteCode,proto3" json:"TrustRemoteCode,omitempty"` - EnforceEager bool `protobuf:"varint,52,opt,name=EnforceEager,proto3" json:"EnforceEager,omitempty"` - SwapSpace int32 `protobuf:"varint,53,opt,name=SwapSpace,proto3" json:"SwapSpace,omitempty"` - MaxModelLen int32 `protobuf:"varint,54,opt,name=MaxModelLen,proto3" json:"MaxModelLen,omitempty"` - MMProj string `protobuf:"bytes,41,opt,name=MMProj,proto3" json:"MMProj,omitempty"` - RopeScaling string `protobuf:"bytes,43,opt,name=RopeScaling,proto3" json:"RopeScaling,omitempty"` - YarnExtFactor float32 `protobuf:"fixed32,44,opt,name=YarnExtFactor,proto3" json:"YarnExtFactor,omitempty"` - YarnAttnFactor float32 `protobuf:"fixed32,45,opt,name=YarnAttnFactor,proto3" json:"YarnAttnFactor,omitempty"` - YarnBetaFast float32 `protobuf:"fixed32,46,opt,name=YarnBetaFast,proto3" json:"YarnBetaFast,omitempty"` - YarnBetaSlow float32 `protobuf:"fixed32,47,opt,name=YarnBetaSlow,proto3" json:"YarnBetaSlow,omitempty"` - Type string `protobuf:"bytes,49,opt,name=Type,proto3" json:"Type,omitempty"` -} - -func (x *ModelOptions) Reset() { - *x = ModelOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[11] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *ModelOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*ModelOptions) ProtoMessage() {} - -func (x *ModelOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[11] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead. -func (*ModelOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{11} -} - -func (x *ModelOptions) GetModel() string { - if x != nil { - return x.Model - } - return "" -} - -func (x *ModelOptions) GetContextSize() int32 { - if x != nil { - return x.ContextSize - } - return 0 -} - -func (x *ModelOptions) GetSeed() int32 { - if x != nil { - return x.Seed - } - return 0 -} - -func (x *ModelOptions) GetNBatch() int32 { - if x != nil { - return x.NBatch - } - return 0 -} - -func (x *ModelOptions) GetF16Memory() bool { - if x != nil { - return x.F16Memory - } - return false -} - -func (x *ModelOptions) GetMLock() bool { - if x != nil { - return x.MLock - } - return false -} - -func (x *ModelOptions) GetMMap() bool { - if x != nil { - return x.MMap - } - return false -} - -func (x *ModelOptions) GetVocabOnly() bool { - if x != nil { - return x.VocabOnly - } - return false -} - -func (x *ModelOptions) GetLowVRAM() bool { - if x != nil { - return x.LowVRAM - } - return false -} - -func (x *ModelOptions) GetEmbeddings() bool { - if x != nil { - return x.Embeddings - } - return false -} - -func (x *ModelOptions) GetNUMA() bool { - if x != nil { - return x.NUMA - } - return false -} - -func (x *ModelOptions) GetNGPULayers() int32 { - if x != nil { - return x.NGPULayers - } - return 0 -} - -func (x *ModelOptions) GetMainGPU() string { - if x != nil { - return x.MainGPU - } - return "" -} - -func (x *ModelOptions) GetTensorSplit() string { - if x != nil { - return x.TensorSplit - } - return "" -} - -func (x *ModelOptions) GetThreads() int32 { - if x != nil { - return x.Threads - } - return 0 -} - -func (x *ModelOptions) GetLibrarySearchPath() string { - if x != nil { - return x.LibrarySearchPath - } - return "" -} - -func (x *ModelOptions) GetRopeFreqBase() float32 { - if x != nil { - return x.RopeFreqBase - } - return 0 -} - -func (x *ModelOptions) GetRopeFreqScale() float32 { - if x != nil { - return x.RopeFreqScale - } - return 0 -} - -func (x *ModelOptions) GetRMSNormEps() float32 { - if x != nil { - return x.RMSNormEps - } - return 0 -} - -func (x *ModelOptions) GetNGQA() int32 { - if x != nil { - return x.NGQA - } - return 0 -} - -func (x *ModelOptions) GetModelFile() string { - if x != nil { - return x.ModelFile - } - return "" -} - -func (x *ModelOptions) GetDevice() string { - if x != nil { - return x.Device - } - return "" -} - -func (x *ModelOptions) GetUseTriton() bool { - if x != nil { - return x.UseTriton - } - return false -} - -func (x *ModelOptions) GetModelBaseName() string { - if x != nil { - return x.ModelBaseName - } - return "" -} - -func (x *ModelOptions) GetUseFastTokenizer() bool { - if x != nil { - return x.UseFastTokenizer - } - return false -} - -func (x *ModelOptions) GetPipelineType() string { - if x != nil { - return x.PipelineType - } - return "" -} - -func (x *ModelOptions) GetSchedulerType() string { - if x != nil { - return x.SchedulerType - } - return "" -} - -func (x *ModelOptions) GetCUDA() bool { - if x != nil { - return x.CUDA - } - return false -} - -func (x *ModelOptions) GetCFGScale() float32 { - if x != nil { - return x.CFGScale - } - return 0 -} - -func (x *ModelOptions) GetIMG2IMG() bool { - if x != nil { - return x.IMG2IMG - } - return false -} - -func (x *ModelOptions) GetCLIPModel() string { - if x != nil { - return x.CLIPModel - } - return "" -} - -func (x *ModelOptions) GetCLIPSubfolder() string { - if x != nil { - return x.CLIPSubfolder - } - return "" -} - -func (x *ModelOptions) GetCLIPSkip() int32 { - if x != nil { - return x.CLIPSkip - } - return 0 -} - -func (x *ModelOptions) GetControlNet() string { - if x != nil { - return x.ControlNet - } - return "" -} - -func (x *ModelOptions) GetTokenizer() string { - if x != nil { - return x.Tokenizer - } - return "" -} - -func (x *ModelOptions) GetLoraBase() string { - if x != nil { - return x.LoraBase - } - return "" -} - -func (x *ModelOptions) GetLoraAdapter() string { - if x != nil { - return x.LoraAdapter - } - return "" -} - -func (x *ModelOptions) GetLoraScale() float32 { - if x != nil { - return x.LoraScale - } - return 0 -} - -func (x *ModelOptions) GetNoMulMatQ() bool { - if x != nil { - return x.NoMulMatQ - } - return false -} - -func (x *ModelOptions) GetDraftModel() string { - if x != nil { - return x.DraftModel - } - return "" -} - -func (x *ModelOptions) GetAudioPath() string { - if x != nil { - return x.AudioPath - } - return "" -} - -func (x *ModelOptions) GetQuantization() string { - if x != nil { - return x.Quantization - } - return "" -} - -func (x *ModelOptions) GetGPUMemoryUtilization() float32 { - if x != nil { - return x.GPUMemoryUtilization - } - return 0 -} - -func (x *ModelOptions) GetTrustRemoteCode() bool { - if x != nil { - return x.TrustRemoteCode - } - return false -} - -func (x *ModelOptions) GetEnforceEager() bool { - if x != nil { - return x.EnforceEager - } - return false -} - -func (x *ModelOptions) GetSwapSpace() int32 { - if x != nil { - return x.SwapSpace - } - return 0 -} - -func (x *ModelOptions) GetMaxModelLen() int32 { - if x != nil { - return x.MaxModelLen - } - return 0 -} - -func (x *ModelOptions) GetMMProj() string { - if x != nil { - return x.MMProj - } - return "" -} - -func (x *ModelOptions) GetRopeScaling() string { - if x != nil { - return x.RopeScaling - } - return "" -} - -func (x *ModelOptions) GetYarnExtFactor() float32 { - if x != nil { - return x.YarnExtFactor - } - return 0 -} - -func (x *ModelOptions) GetYarnAttnFactor() float32 { - if x != nil { - return x.YarnAttnFactor - } - return 0 -} - -func (x *ModelOptions) GetYarnBetaFast() float32 { - if x != nil { - return x.YarnBetaFast - } - return 0 -} - -func (x *ModelOptions) GetYarnBetaSlow() float32 { - if x != nil { - return x.YarnBetaSlow - } - return 0 -} - -func (x *ModelOptions) GetType() string { - if x != nil { - return x.Type - } - return "" -} - -type Result struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` - Success bool `protobuf:"varint,2,opt,name=success,proto3" json:"success,omitempty"` -} - -func (x *Result) Reset() { - *x = Result{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[12] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Result) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Result) ProtoMessage() {} - -func (x *Result) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[12] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Result.ProtoReflect.Descriptor instead. -func (*Result) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{12} -} - -func (x *Result) GetMessage() string { - if x != nil { - return x.Message - } - return "" -} - -func (x *Result) GetSuccess() bool { - if x != nil { - return x.Success - } - return false -} - -type EmbeddingResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Embeddings []float32 `protobuf:"fixed32,1,rep,packed,name=embeddings,proto3" json:"embeddings,omitempty"` -} - -func (x *EmbeddingResult) Reset() { - *x = EmbeddingResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[13] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *EmbeddingResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*EmbeddingResult) ProtoMessage() {} - -func (x *EmbeddingResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[13] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use EmbeddingResult.ProtoReflect.Descriptor instead. -func (*EmbeddingResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{13} -} - -func (x *EmbeddingResult) GetEmbeddings() []float32 { - if x != nil { - return x.Embeddings - } - return nil -} - -type TranscriptRequest struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Dst string `protobuf:"bytes,2,opt,name=dst,proto3" json:"dst,omitempty"` - Language string `protobuf:"bytes,3,opt,name=language,proto3" json:"language,omitempty"` - Threads uint32 `protobuf:"varint,4,opt,name=threads,proto3" json:"threads,omitempty"` -} - -func (x *TranscriptRequest) Reset() { - *x = TranscriptRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[14] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TranscriptRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TranscriptRequest) ProtoMessage() {} - -func (x *TranscriptRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[14] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TranscriptRequest.ProtoReflect.Descriptor instead. -func (*TranscriptRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{14} -} - -func (x *TranscriptRequest) GetDst() string { - if x != nil { - return x.Dst - } - return "" -} - -func (x *TranscriptRequest) GetLanguage() string { - if x != nil { - return x.Language - } - return "" -} - -func (x *TranscriptRequest) GetThreads() uint32 { - if x != nil { - return x.Threads - } - return 0 -} - -type TranscriptResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Segments []*TranscriptSegment `protobuf:"bytes,1,rep,name=segments,proto3" json:"segments,omitempty"` - Text string `protobuf:"bytes,2,opt,name=text,proto3" json:"text,omitempty"` -} - -func (x *TranscriptResult) Reset() { - *x = TranscriptResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[15] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TranscriptResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TranscriptResult) ProtoMessage() {} - -func (x *TranscriptResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[15] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TranscriptResult.ProtoReflect.Descriptor instead. -func (*TranscriptResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{15} -} - -func (x *TranscriptResult) GetSegments() []*TranscriptSegment { - if x != nil { - return x.Segments - } - return nil -} - -func (x *TranscriptResult) GetText() string { - if x != nil { - return x.Text - } - return "" -} - -type TranscriptSegment struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Id int32 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` - Start int64 `protobuf:"varint,2,opt,name=start,proto3" json:"start,omitempty"` - End int64 `protobuf:"varint,3,opt,name=end,proto3" json:"end,omitempty"` - Text string `protobuf:"bytes,4,opt,name=text,proto3" json:"text,omitempty"` - Tokens []int32 `protobuf:"varint,5,rep,packed,name=tokens,proto3" json:"tokens,omitempty"` -} - -func (x *TranscriptSegment) Reset() { - *x = TranscriptSegment{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[16] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TranscriptSegment) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TranscriptSegment) ProtoMessage() {} - -func (x *TranscriptSegment) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[16] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TranscriptSegment.ProtoReflect.Descriptor instead. -func (*TranscriptSegment) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{16} -} - -func (x *TranscriptSegment) GetId() int32 { - if x != nil { - return x.Id - } - return 0 -} - -func (x *TranscriptSegment) GetStart() int64 { - if x != nil { - return x.Start - } - return 0 -} - -func (x *TranscriptSegment) GetEnd() int64 { - if x != nil { - return x.End - } - return 0 -} - -func (x *TranscriptSegment) GetText() string { - if x != nil { - return x.Text - } - return "" -} - -func (x *TranscriptSegment) GetTokens() []int32 { - if x != nil { - return x.Tokens - } - return nil -} - -type GenerateImageRequest struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Height int32 `protobuf:"varint,1,opt,name=height,proto3" json:"height,omitempty"` - Width int32 `protobuf:"varint,2,opt,name=width,proto3" json:"width,omitempty"` - Mode int32 `protobuf:"varint,3,opt,name=mode,proto3" json:"mode,omitempty"` - Step int32 `protobuf:"varint,4,opt,name=step,proto3" json:"step,omitempty"` - Seed int32 `protobuf:"varint,5,opt,name=seed,proto3" json:"seed,omitempty"` - PositivePrompt string `protobuf:"bytes,6,opt,name=positive_prompt,json=positivePrompt,proto3" json:"positive_prompt,omitempty"` - NegativePrompt string `protobuf:"bytes,7,opt,name=negative_prompt,json=negativePrompt,proto3" json:"negative_prompt,omitempty"` - Dst string `protobuf:"bytes,8,opt,name=dst,proto3" json:"dst,omitempty"` - Src string `protobuf:"bytes,9,opt,name=src,proto3" json:"src,omitempty"` - // Diffusers - EnableParameters string `protobuf:"bytes,10,opt,name=EnableParameters,proto3" json:"EnableParameters,omitempty"` - CLIPSkip int32 `protobuf:"varint,11,opt,name=CLIPSkip,proto3" json:"CLIPSkip,omitempty"` -} - -func (x *GenerateImageRequest) Reset() { - *x = GenerateImageRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[17] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *GenerateImageRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*GenerateImageRequest) ProtoMessage() {} - -func (x *GenerateImageRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[17] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use GenerateImageRequest.ProtoReflect.Descriptor instead. -func (*GenerateImageRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{17} -} - -func (x *GenerateImageRequest) GetHeight() int32 { - if x != nil { - return x.Height - } - return 0 -} - -func (x *GenerateImageRequest) GetWidth() int32 { - if x != nil { - return x.Width - } - return 0 -} - -func (x *GenerateImageRequest) GetMode() int32 { - if x != nil { - return x.Mode - } - return 0 -} - -func (x *GenerateImageRequest) GetStep() int32 { - if x != nil { - return x.Step - } - return 0 -} - -func (x *GenerateImageRequest) GetSeed() int32 { - if x != nil { - return x.Seed - } - return 0 -} - -func (x *GenerateImageRequest) GetPositivePrompt() string { - if x != nil { - return x.PositivePrompt - } - return "" -} - -func (x *GenerateImageRequest) GetNegativePrompt() string { - if x != nil { - return x.NegativePrompt - } - return "" -} - -func (x *GenerateImageRequest) GetDst() string { - if x != nil { - return x.Dst - } - return "" -} - -func (x *GenerateImageRequest) GetSrc() string { - if x != nil { - return x.Src - } - return "" -} - -func (x *GenerateImageRequest) GetEnableParameters() string { - if x != nil { - return x.EnableParameters - } - return "" -} - -func (x *GenerateImageRequest) GetCLIPSkip() int32 { - if x != nil { - return x.CLIPSkip - } - return 0 -} - -type TTSRequest struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"` - Model string `protobuf:"bytes,2,opt,name=model,proto3" json:"model,omitempty"` - Dst string `protobuf:"bytes,3,opt,name=dst,proto3" json:"dst,omitempty"` - Voice string `protobuf:"bytes,4,opt,name=voice,proto3" json:"voice,omitempty"` -} - -func (x *TTSRequest) Reset() { - *x = TTSRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[18] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TTSRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TTSRequest) ProtoMessage() {} - -func (x *TTSRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[18] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TTSRequest.ProtoReflect.Descriptor instead. -func (*TTSRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{18} -} - -func (x *TTSRequest) GetText() string { - if x != nil { - return x.Text - } - return "" -} - -func (x *TTSRequest) GetModel() string { - if x != nil { - return x.Model - } - return "" -} - -func (x *TTSRequest) GetDst() string { - if x != nil { - return x.Dst - } - return "" -} - -func (x *TTSRequest) GetVoice() string { - if x != nil { - return x.Voice - } - return "" -} - -type TokenizationResponse struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Length int32 `protobuf:"varint,1,opt,name=length,proto3" json:"length,omitempty"` - Tokens []int32 `protobuf:"varint,2,rep,packed,name=tokens,proto3" json:"tokens,omitempty"` -} - -func (x *TokenizationResponse) Reset() { - *x = TokenizationResponse{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[19] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TokenizationResponse) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TokenizationResponse) ProtoMessage() {} - -func (x *TokenizationResponse) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[19] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TokenizationResponse.ProtoReflect.Descriptor instead. -func (*TokenizationResponse) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{19} -} - -func (x *TokenizationResponse) GetLength() int32 { - if x != nil { - return x.Length - } - return 0 -} - -func (x *TokenizationResponse) GetTokens() []int32 { - if x != nil { - return x.Tokens - } - return nil -} - -type MemoryUsageData struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Total uint64 `protobuf:"varint,1,opt,name=total,proto3" json:"total,omitempty"` - Breakdown map[string]uint64 `protobuf:"bytes,2,rep,name=breakdown,proto3" json:"breakdown,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"` -} - -func (x *MemoryUsageData) Reset() { - *x = MemoryUsageData{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[20] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *MemoryUsageData) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*MemoryUsageData) ProtoMessage() {} - -func (x *MemoryUsageData) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[20] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use MemoryUsageData.ProtoReflect.Descriptor instead. -func (*MemoryUsageData) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{20} -} - -func (x *MemoryUsageData) GetTotal() uint64 { - if x != nil { - return x.Total - } - return 0 -} - -func (x *MemoryUsageData) GetBreakdown() map[string]uint64 { - if x != nil { - return x.Breakdown - } - return nil -} - -type StatusResponse struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - State StatusResponse_State `protobuf:"varint,1,opt,name=state,proto3,enum=backend.StatusResponse_State" json:"state,omitempty"` - Memory *MemoryUsageData `protobuf:"bytes,2,opt,name=memory,proto3" json:"memory,omitempty"` -} - -func (x *StatusResponse) Reset() { - *x = StatusResponse{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[21] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StatusResponse) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StatusResponse) ProtoMessage() {} - -func (x *StatusResponse) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[21] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StatusResponse.ProtoReflect.Descriptor instead. -func (*StatusResponse) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{21} -} - -func (x *StatusResponse) GetState() StatusResponse_State { - if x != nil { - return x.State - } - return StatusResponse_UNINITIALIZED -} - -func (x *StatusResponse) GetMemory() *MemoryUsageData { - if x != nil { - return x.Memory - } - return nil -} - -type Message struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"` - Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"` -} - -func (x *Message) Reset() { - *x = Message{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[22] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Message) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Message) ProtoMessage() {} - -func (x *Message) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[22] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Message.ProtoReflect.Descriptor instead. -func (*Message) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{22} -} - -func (x *Message) GetRole() string { - if x != nil { - return x.Role - } - return "" -} - -func (x *Message) GetContent() string { - if x != nil { - return x.Content - } - return "" -} - -var File_backend_proto protoreflect.FileDescriptor - -var file_backend_proto_rawDesc = []byte{ - 0x0a, 0x0d, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x07, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x23, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, - 0x65, 0x73, 0x4b, 0x65, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x18, - 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x22, 0x23, 0x0a, - 0x0b, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x14, 0x0a, 0x05, - 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x42, 0x79, 0x74, - 0x65, 0x73, 0x22, 0x68, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, - 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, - 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, - 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x22, 0x3d, 0x0a, 0x13, - 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, - 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x3a, 0x0a, 0x10, 0x53, - 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, - 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, - 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x67, 0x0a, 0x0f, 0x53, 0x74, 0x6f, 0x72, 0x65, - 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, - 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, - 0x22, 0x4d, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, 0x03, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x03, 0x4b, 0x65, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x54, - 0x6f, 0x70, 0x4b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x22, - 0x8c, 0x01, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, - 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, - 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x53, 0x69, - 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02, - 0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f, - 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, - 0xd6, 0x0a, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, - 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, - 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, - 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, - 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, - 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, - 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, - 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, - 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, - 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, - 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, - 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, - 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, - 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, - 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, - 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12, 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, - 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, - 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, - 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, - 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, - 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, - 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, - 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, - 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, - 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, - 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, - 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, - 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, - 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, - 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, - 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, - 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12, 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, - 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, - 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, - 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, - 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, - 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, - 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, - 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, - 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d, - 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18, - 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, - 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, - 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, - 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, - 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01, - 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d, - 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, - 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x12, 0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, - 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28, - 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, - 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, - 0x67, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, - 0x73, 0x65, 0x18, 0x25, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, - 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, - 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, - 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x30, 0x0a, 0x13, - 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, - 0x61, 0x6c, 0x65, 0x18, 0x27, 0x20, 0x01, 0x28, 0x02, 0x52, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74, - 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x26, - 0x0a, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, - 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, - 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, - 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, - 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x2b, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, - 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x2c, 0x0a, 0x08, 0x4d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x08, - 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, - 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, - 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, - 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, - 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, - 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, - 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, - 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, - 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, - 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, - 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, - 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, - 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, - 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, - 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, - 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, - 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, - 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, - 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, - 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, - 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, - 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, - 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, - 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, - 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, - 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, - 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, - 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, - 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, - 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, - 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, - 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, - 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, - 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, - 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, - 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, - 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, - 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, - 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, - 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, - 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, - 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, - 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, - 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, - 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, - 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, - 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, - 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, - 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, - 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, - 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, - 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, - 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, - 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, - 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, - 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, - 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, - 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, - 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, - 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, - 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, - 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, - 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, - 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, - 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, - 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, - 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, - 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, - 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, - 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, - 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, - 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, - 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, - 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, - 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, - 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, - 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, - 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, - 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, - 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, - 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, - 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, - 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, - 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, - 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, - 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, - 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, - 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, - 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, - 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, - 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, - 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, - 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, - 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, - 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, - 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, - 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, - 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, - 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, - 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, - 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, - 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, - 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, - 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, - 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, - 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, - 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, - 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, - 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, - 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, - 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, - 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, - 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, - 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, - 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, - 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, - 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, - 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, - 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, - 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, - 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, - 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, - 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, - 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, - 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x22, 0x37, - 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, - 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a, - 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x32, 0xfb, 0x06, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, - 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, - 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, - 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, - 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, - 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, - 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, - 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, - 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, - 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, - 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x39, - 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x0c, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x09, 0x53, 0x74, - 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x45, - 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x12, 0x1a, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, - 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, - 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, - 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, -} - -var ( - file_backend_proto_rawDescOnce sync.Once - file_backend_proto_rawDescData = file_backend_proto_rawDesc -) - -func file_backend_proto_rawDescGZIP() []byte { - file_backend_proto_rawDescOnce.Do(func() { - file_backend_proto_rawDescData = protoimpl.X.CompressGZIP(file_backend_proto_rawDescData) - }) - return file_backend_proto_rawDescData -} - -var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 24) -var file_backend_proto_goTypes = []interface{}{ - (StatusResponse_State)(0), // 0: backend.StatusResponse.State - (*StoresKey)(nil), // 1: backend.StoresKey - (*StoresValue)(nil), // 2: backend.StoresValue - (*StoresSetOptions)(nil), // 3: backend.StoresSetOptions - (*StoresDeleteOptions)(nil), // 4: backend.StoresDeleteOptions - (*StoresGetOptions)(nil), // 5: backend.StoresGetOptions - (*StoresGetResult)(nil), // 6: backend.StoresGetResult - (*StoresFindOptions)(nil), // 7: backend.StoresFindOptions - (*StoresFindResult)(nil), // 8: backend.StoresFindResult - (*HealthMessage)(nil), // 9: backend.HealthMessage - (*PredictOptions)(nil), // 10: backend.PredictOptions - (*Reply)(nil), // 11: backend.Reply - (*ModelOptions)(nil), // 12: backend.ModelOptions - (*Result)(nil), // 13: backend.Result - (*EmbeddingResult)(nil), // 14: backend.EmbeddingResult - (*TranscriptRequest)(nil), // 15: backend.TranscriptRequest - (*TranscriptResult)(nil), // 16: backend.TranscriptResult - (*TranscriptSegment)(nil), // 17: backend.TranscriptSegment - (*GenerateImageRequest)(nil), // 18: backend.GenerateImageRequest - (*TTSRequest)(nil), // 19: backend.TTSRequest - (*TokenizationResponse)(nil), // 20: backend.TokenizationResponse - (*MemoryUsageData)(nil), // 21: backend.MemoryUsageData - (*StatusResponse)(nil), // 22: backend.StatusResponse - (*Message)(nil), // 23: backend.Message - nil, // 24: backend.MemoryUsageData.BreakdownEntry -} -var file_backend_proto_depIdxs = []int32{ - 1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey - 2, // 1: backend.StoresSetOptions.Values:type_name -> backend.StoresValue - 1, // 2: backend.StoresDeleteOptions.Keys:type_name -> backend.StoresKey - 1, // 3: backend.StoresGetOptions.Keys:type_name -> backend.StoresKey - 1, // 4: backend.StoresGetResult.Keys:type_name -> backend.StoresKey - 2, // 5: backend.StoresGetResult.Values:type_name -> backend.StoresValue - 1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey - 1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey - 2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue - 23, // 9: backend.PredictOptions.Messages:type_name -> backend.Message - 17, // 10: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment - 24, // 11: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry - 0, // 12: backend.StatusResponse.state:type_name -> backend.StatusResponse.State - 21, // 13: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData - 9, // 14: backend.Backend.Health:input_type -> backend.HealthMessage - 10, // 15: backend.Backend.Predict:input_type -> backend.PredictOptions - 12, // 16: backend.Backend.LoadModel:input_type -> backend.ModelOptions - 10, // 17: backend.Backend.PredictStream:input_type -> backend.PredictOptions - 10, // 18: backend.Backend.Embedding:input_type -> backend.PredictOptions - 18, // 19: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest - 15, // 20: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest - 19, // 21: backend.Backend.TTS:input_type -> backend.TTSRequest - 10, // 22: backend.Backend.TokenizeString:input_type -> backend.PredictOptions - 9, // 23: backend.Backend.Status:input_type -> backend.HealthMessage - 3, // 24: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions - 4, // 25: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions - 5, // 26: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions - 7, // 27: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions - 11, // 28: backend.Backend.Health:output_type -> backend.Reply - 11, // 29: backend.Backend.Predict:output_type -> backend.Reply - 13, // 30: backend.Backend.LoadModel:output_type -> backend.Result - 11, // 31: backend.Backend.PredictStream:output_type -> backend.Reply - 14, // 32: backend.Backend.Embedding:output_type -> backend.EmbeddingResult - 13, // 33: backend.Backend.GenerateImage:output_type -> backend.Result - 16, // 34: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult - 13, // 35: backend.Backend.TTS:output_type -> backend.Result - 20, // 36: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse - 22, // 37: backend.Backend.Status:output_type -> backend.StatusResponse - 13, // 38: backend.Backend.StoresSet:output_type -> backend.Result - 13, // 39: backend.Backend.StoresDelete:output_type -> backend.Result - 6, // 40: backend.Backend.StoresGet:output_type -> backend.StoresGetResult - 8, // 41: backend.Backend.StoresFind:output_type -> backend.StoresFindResult - 28, // [28:42] is the sub-list for method output_type - 14, // [14:28] is the sub-list for method input_type - 14, // [14:14] is the sub-list for extension type_name - 14, // [14:14] is the sub-list for extension extendee - 0, // [0:14] is the sub-list for field type_name -} - -func init() { file_backend_proto_init() } -func file_backend_proto_init() { - if File_backend_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_backend_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresKey); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresSetOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresDeleteOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresGetOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresGetResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresFindOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresFindResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*HealthMessage); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PredictOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Reply); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ModelOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Result); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EmbeddingResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptSegment); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GenerateImageRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TTSRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TokenizationResponse); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MemoryUsageData); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StatusResponse); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Message); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_backend_proto_rawDesc, - NumEnums: 1, - NumMessages: 24, - NumExtensions: 0, - NumServices: 1, - }, - GoTypes: file_backend_proto_goTypes, - DependencyIndexes: file_backend_proto_depIdxs, - EnumInfos: file_backend_proto_enumTypes, - MessageInfos: file_backend_proto_msgTypes, - }.Build() - File_backend_proto = out.File - file_backend_proto_rawDesc = nil - file_backend_proto_goTypes = nil - file_backend_proto_depIdxs = nil -} diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go deleted file mode 100644 index a1f442e0..00000000 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ /dev/null @@ -1,618 +0,0 @@ -// Code generated by protoc-gen-go-grpc. DO NOT EDIT. -// versions: -// - protoc-gen-go-grpc v1.3.0 -// - protoc v5.26.1 -// source: backend.proto - -package proto - -import ( - context "context" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.32.0 or later. -const _ = grpc.SupportPackageIsVersion7 - -const ( - Backend_Health_FullMethodName = "/backend.Backend/Health" - Backend_Predict_FullMethodName = "/backend.Backend/Predict" - Backend_LoadModel_FullMethodName = "/backend.Backend/LoadModel" - Backend_PredictStream_FullMethodName = "/backend.Backend/PredictStream" - Backend_Embedding_FullMethodName = "/backend.Backend/Embedding" - Backend_GenerateImage_FullMethodName = "/backend.Backend/GenerateImage" - Backend_AudioTranscription_FullMethodName = "/backend.Backend/AudioTranscription" - Backend_TTS_FullMethodName = "/backend.Backend/TTS" - Backend_TokenizeString_FullMethodName = "/backend.Backend/TokenizeString" - Backend_Status_FullMethodName = "/backend.Backend/Status" - Backend_StoresSet_FullMethodName = "/backend.Backend/StoresSet" - Backend_StoresDelete_FullMethodName = "/backend.Backend/StoresDelete" - Backend_StoresGet_FullMethodName = "/backend.Backend/StoresGet" - Backend_StoresFind_FullMethodName = "/backend.Backend/StoresFind" -) - -// BackendClient is the client API for Backend service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -type BackendClient interface { - Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) - Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) - LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) - PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) - Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) - GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) - AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) - TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) - TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) - Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) - StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) - StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) - StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) - StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) -} - -type backendClient struct { - cc grpc.ClientConnInterface -} - -func NewBackendClient(cc grpc.ClientConnInterface) BackendClient { - return &backendClient{cc} -} - -func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, Backend_Health_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, Backend_Predict_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_LoadModel_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) { - stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], Backend_PredictStream_FullMethodName, opts...) - if err != nil { - return nil, err - } - x := &backendPredictStreamClient{stream} - if err := x.ClientStream.SendMsg(in); err != nil { - return nil, err - } - if err := x.ClientStream.CloseSend(); err != nil { - return nil, err - } - return x, nil -} - -type Backend_PredictStreamClient interface { - Recv() (*Reply, error) - grpc.ClientStream -} - -type backendPredictStreamClient struct { - grpc.ClientStream -} - -func (x *backendPredictStreamClient) Recv() (*Reply, error) { - m := new(Reply) - if err := x.ClientStream.RecvMsg(m); err != nil { - return nil, err - } - return m, nil -} - -func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) { - out := new(EmbeddingResult) - err := c.cc.Invoke(ctx, Backend_Embedding_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_GenerateImage_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) { - out := new(TranscriptResult) - err := c.cc.Invoke(ctx, Backend_AudioTranscription_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_TTS_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) { - out := new(TokenizationResponse) - err := c.cc.Invoke(ctx, Backend_TokenizeString_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) { - out := new(StatusResponse) - err := c.cc.Invoke(ctx, Backend_Status_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_StoresSet_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_StoresDelete_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) { - out := new(StoresGetResult) - err := c.cc.Invoke(ctx, Backend_StoresGet_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) { - out := new(StoresFindResult) - err := c.cc.Invoke(ctx, Backend_StoresFind_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -// BackendServer is the server API for Backend service. -// All implementations must embed UnimplementedBackendServer -// for forward compatibility -type BackendServer interface { - Health(context.Context, *HealthMessage) (*Reply, error) - Predict(context.Context, *PredictOptions) (*Reply, error) - LoadModel(context.Context, *ModelOptions) (*Result, error) - PredictStream(*PredictOptions, Backend_PredictStreamServer) error - Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) - GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) - AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) - TTS(context.Context, *TTSRequest) (*Result, error) - TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) - Status(context.Context, *HealthMessage) (*StatusResponse, error) - StoresSet(context.Context, *StoresSetOptions) (*Result, error) - StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) - StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) - StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) - mustEmbedUnimplementedBackendServer() -} - -// UnimplementedBackendServer must be embedded to have forward compatible implementations. -type UnimplementedBackendServer struct { -} - -func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Health not implemented") -} -func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented") -} -func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented") -} -func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error { - return status.Errorf(codes.Unimplemented, "method PredictStream not implemented") -} -func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented") -} -func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented") -} -func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented") -} -func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented") -} -func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented") -} -func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method Status not implemented") -} -func (UnimplementedBackendServer) StoresSet(context.Context, *StoresSetOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresSet not implemented") -} -func (UnimplementedBackendServer) StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresDelete not implemented") -} -func (UnimplementedBackendServer) StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresGet not implemented") -} -func (UnimplementedBackendServer) StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresFind not implemented") -} -func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {} - -// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service. -// Use of this interface is not recommended, as added methods to BackendServer will -// result in compilation errors. -type UnsafeBackendServer interface { - mustEmbedUnimplementedBackendServer() -} - -func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) { - s.RegisterService(&Backend_ServiceDesc, srv) -} - -func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Health(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Health_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Health(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Predict(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Predict_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Predict(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ModelOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).LoadModel(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_LoadModel_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error { - m := new(PredictOptions) - if err := stream.RecvMsg(m); err != nil { - return err - } - return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream}) -} - -type Backend_PredictStreamServer interface { - Send(*Reply) error - grpc.ServerStream -} - -type backendPredictStreamServer struct { - grpc.ServerStream -} - -func (x *backendPredictStreamServer) Send(m *Reply) error { - return x.ServerStream.SendMsg(m) -} - -func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Embedding(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Embedding_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(GenerateImageRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).GenerateImage(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_GenerateImage_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TranscriptRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).AudioTranscription(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_AudioTranscription_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TTSRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TTS(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_TTS_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TTS(ctx, req.(*TTSRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TokenizeString(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_TokenizeString_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Status(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Status_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Status(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresSet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresSetOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresSet(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresSet_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresSet(ctx, req.(*StoresSetOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresDeleteOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresDelete(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresDelete_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresDelete(ctx, req.(*StoresDeleteOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresGet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresGetOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresGet(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresGet_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresGet(ctx, req.(*StoresGetOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresFind_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresFindOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresFind(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresFind_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresFind(ctx, req.(*StoresFindOptions)) - } - return interceptor(ctx, in, info, handler) -} - -// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service. -// It's only intended for direct use with grpc.RegisterService, -// and not to be introspected or modified (even as a copy) -var Backend_ServiceDesc = grpc.ServiceDesc{ - ServiceName: "backend.Backend", - HandlerType: (*BackendServer)(nil), - Methods: []grpc.MethodDesc{ - { - MethodName: "Health", - Handler: _Backend_Health_Handler, - }, - { - MethodName: "Predict", - Handler: _Backend_Predict_Handler, - }, - { - MethodName: "LoadModel", - Handler: _Backend_LoadModel_Handler, - }, - { - MethodName: "Embedding", - Handler: _Backend_Embedding_Handler, - }, - { - MethodName: "GenerateImage", - Handler: _Backend_GenerateImage_Handler, - }, - { - MethodName: "AudioTranscription", - Handler: _Backend_AudioTranscription_Handler, - }, - { - MethodName: "TTS", - Handler: _Backend_TTS_Handler, - }, - { - MethodName: "TokenizeString", - Handler: _Backend_TokenizeString_Handler, - }, - { - MethodName: "Status", - Handler: _Backend_Status_Handler, - }, - { - MethodName: "StoresSet", - Handler: _Backend_StoresSet_Handler, - }, - { - MethodName: "StoresDelete", - Handler: _Backend_StoresDelete_Handler, - }, - { - MethodName: "StoresGet", - Handler: _Backend_StoresGet_Handler, - }, - { - MethodName: "StoresFind", - Handler: _Backend_StoresFind_Handler, - }, - }, - Streams: []grpc.StreamDesc{ - { - StreamName: "PredictStream", - Handler: _Backend_PredictStream_Handler, - ServerStreams: true, - }, - }, - Metadata: "backend.proto", -} From eed5706994a3e770a0194cad9d1cfd724ba1b10a Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 13 Apr 2024 03:45:34 -0400 Subject: [PATCH 0292/2895] refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee --- .github/workflows/test.yml | 15 +- Makefile | 18 +- backend/go/transcribe/transcript.go | 6 +- backend/go/transcribe/whisper.go | 2 +- core/backend/embeddings.go | 90 +- core/backend/image.go | 261 +++++- core/backend/llm.go | 271 ++++-- core/backend/options.go | 84 +- core/backend/transcript.go | 41 +- core/backend/tts.go | 77 +- core/cli/run.go | 8 +- core/cli/transcript.go | 19 +- core/cli/tts.go | 26 +- core/config/backend_config.go | 301 +------ core/config/backend_config_loader.go | 509 +++++++++++ core/config/exports_test.go | 6 + core/http/api.go | 209 +++-- core/http/api_test.go | 98 ++- core/http/ctx/fiber.go | 65 +- core/http/endpoints/elevenlabs/tts.go | 39 +- .../http/endpoints/localai/backend_monitor.go | 4 +- core/http/endpoints/localai/tts.go | 39 +- core/http/endpoints/openai/assistant.go | 2 +- core/http/endpoints/openai/chat.go | 621 ++------------ core/http/endpoints/openai/completion.go | 163 +--- core/http/endpoints/openai/edit.go | 78 +- core/http/endpoints/openai/embeddings.go | 65 +- core/http/endpoints/openai/image.go | 218 +---- core/http/endpoints/openai/inference.go | 55 -- core/http/endpoints/openai/list.go | 52 +- core/http/endpoints/openai/request.go | 285 ------- core/http/endpoints/openai/transcription.go | 28 +- core/schema/{whisper.go => transcription.go} | 2 +- core/services/backend_monitor.go | 30 +- core/services/gallery.go | 116 ++- core/services/list_models.go | 72 ++ .../services}/model_preload_test.go | 5 +- core/services/openai.go | 805 ++++++++++++++++++ core/startup/startup.go | 91 +- core/state.go | 41 + .../llm text/-completions Stream.bru | 25 + pkg/concurrency/concurrency.go | 135 +++ pkg/concurrency/concurrency_test.go | 101 +++ pkg/concurrency/types.go | 6 + pkg/grpc/backend.go | 2 +- pkg/grpc/base/base.go | 4 +- pkg/grpc/client.go | 4 +- pkg/grpc/embed.go | 4 +- pkg/grpc/interface.go | 2 +- pkg/model/initializers.go | 8 +- pkg/startup/model_preload.go | 85 -- pkg/utils/base64.go | 50 ++ 52 files changed, 3064 insertions(+), 2279 deletions(-) create mode 100644 core/config/backend_config_loader.go create mode 100644 core/config/exports_test.go delete mode 100644 core/http/endpoints/openai/inference.go delete mode 100644 core/http/endpoints/openai/request.go rename core/schema/{whisper.go => transcription.go} (90%) create mode 100644 core/services/list_models.go rename {pkg/startup => core/services}/model_preload_test.go (96%) create mode 100644 core/services/openai.go create mode 100644 core/state.go create mode 100644 examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru create mode 100644 pkg/concurrency/concurrency.go create mode 100644 pkg/concurrency/concurrency_test.go create mode 100644 pkg/concurrency/types.go delete mode 100644 pkg/startup/model_preload.go create mode 100644 pkg/utils/base64.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46c4e065..29bd3e08 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,8 +121,9 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: dave-gray101/action-tmate@master + with: + connect-timeout-seconds: 180 tests-aio-container: runs-on: ubuntu-latest @@ -173,8 +174,9 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: dave-gray101/action-tmate@master + with: + connect-timeout-seconds: 180 tests-apple: runs-on: macOS-14 @@ -207,5 +209,6 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 \ No newline at end of file + uses: dave-gray101/action-tmate@master + with: + connect-timeout-seconds: 180 \ No newline at end of file diff --git a/Makefile b/Makefile index 5932dfb2..9f86ef23 100644 --- a/Makefile +++ b/Makefile @@ -301,6 +301,9 @@ clean-tests: rm -rf test-dir rm -rf core/http/backend-assets +halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually + ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {} + ## Build: build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) @@ -365,13 +368,13 @@ run-e2e-image: run-e2e-aio: @echo 'Running e2e AIO tests' - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio test-e2e: @echo 'Running e2e tests' BUILD_TYPE=$(BUILD_TYPE) \ LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e teardown-e2e: rm -rf $(TEST_DIR) || true @@ -379,15 +382,15 @@ teardown-e2e: test-gpt4all: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) test-llama: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) test-llama-gguf: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) test-tts: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ @@ -636,7 +639,10 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ - +# EXPERIMENTAL: +ifeq ($(BUILD_TYPE),metal) + cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/ +endif backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index fdfaa974..b38d5b9f 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -21,7 +21,7 @@ func runCommand(command []string) (string, error) { // AudioToWav converts audio to wav for transcribe. // TODO: use https://github.com/mccoyst/ogg? func audioToWav(src, dst string) error { - command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} + command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} out, err := runCommand(command) if err != nil { return fmt.Errorf("error: %w out: %s", err, out) @@ -29,8 +29,8 @@ func audioToWav(src, dst string) error { return nil } -func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) { - res := schema.Result{} +func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) { + res := schema.TranscriptionResult{} dir, err := os.MkdirTemp("", "whisper") if err != nil { diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go index ac93be01..a9a62d24 100644 --- a/backend/go/transcribe/whisper.go +++ b/backend/go/transcribe/whisper.go @@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error { return err } -func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) { +func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) { return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads)) } diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 03ff90b9..2c63dedc 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -2,14 +2,100 @@ package backend import ( "fmt" + "time" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { +type EmbeddingsBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig +} + +func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService { + return &EmbeddingsBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, + } +} + +func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { + + resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + go func(request *schema.OpenAIRequest) { + if request.Model == "" { + request.Model = model.StableDiffusionBackend + } + + bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + items := []schema.Item{} + + for i, s := range bc.InputToken { + // get the model function to call for the result + embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + embeddings, err := embedFn() + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + for i, s := range bc.InputStrings { + // get the model function to call for the result + embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + embeddings, err := embedFn() + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Data: items, + Object: "list", + } + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} + close(resultChannel) + }(request) + return resultChannel +} + +func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { modelFile := backendConfig.Model grpcOpts := gRPCModelOpts(backendConfig) diff --git a/core/backend/image.go b/core/backend/image.go index b0cffb0b..affb3bb3 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -1,18 +1,252 @@ package backend import ( - "github.com/go-skynet/LocalAI/core/config" + "bufio" + "encoding/base64" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "time" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" + "github.com/rs/zerolog/log" + + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" ) -func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { +type ImageGenerationBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig + BaseUrlForGeneratedImages string +} + +func NewImageGenerationBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ImageGenerationBackendService { + return &ImageGenerationBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, + } +} + +func (igbs *ImageGenerationBackendService) GenerateImage(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { + resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + go func(request *schema.OpenAIRequest) { + bc, request, err := igbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, igbs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + src := "" + if request.File != "" { + + var fileData []byte + // check if input.File is an URL, if so download it and save it + // to a temporary file + if strings.HasPrefix(request.File, "http://") || strings.HasPrefix(request.File, "https://") { + out, err := downloadFile(request.File) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed downloading file:%w", err)} + close(resultChannel) + return + } + defer os.RemoveAll(out) + + fileData, err = os.ReadFile(out) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed reading file:%w", err)} + close(resultChannel) + return + } + + } else { + // base 64 decode the file and write it somewhere + // that we will cleanup + fileData, err = base64.StdEncoding.DecodeString(request.File) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + } + + // Create a temporary file + outputFile, err := os.CreateTemp(igbs.appConfig.ImageDir, "b64") + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + // write the base64 result + writer := bufio.NewWriter(outputFile) + _, err = writer.Write(fileData) + if err != nil { + outputFile.Close() + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + outputFile.Close() + src = outputFile.Name() + defer os.RemoveAll(src) + } + + log.Debug().Msgf("Parameter Config: %+v", bc) + + switch bc.Backend { + case "stablediffusion": + bc.Backend = model.StableDiffusionBackend + case "tinydream": + bc.Backend = model.TinyDreamBackend + case "": + bc.Backend = model.StableDiffusionBackend + if bc.Model == "" { + bc.Model = "stablediffusion_assets" // TODO: check? + } + } + + sizeParts := strings.Split(request.Size, "x") + if len(sizeParts) != 2 { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} + close(resultChannel) + return + } + width, err := strconv.Atoi(sizeParts[0]) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} + close(resultChannel) + return + } + height, err := strconv.Atoi(sizeParts[1]) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} + close(resultChannel) + return + } + + b64JSON := false + if request.ResponseFormat.Type == "b64_json" { + b64JSON = true + } + // src and clip_skip + var result []schema.Item + for _, i := range bc.PromptStrings { + n := request.N + if request.N == 0 { + n = 1 + } + for j := 0; j < n; j++ { + prompts := strings.Split(i, "|") + positive_prompt := prompts[0] + negative_prompt := "" + if len(prompts) > 1 { + negative_prompt = prompts[1] + } + + mode := 0 + step := bc.Step + if step == 0 { + step = 15 + } + + if request.Mode != 0 { + mode = request.Mode + } + + if request.Step != 0 { + step = request.Step + } + + tempDir := "" + if !b64JSON { + tempDir = igbs.appConfig.ImageDir + } + // Create a temporary file + outputFile, err := os.CreateTemp(tempDir, "b64") + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + outputFile.Close() + output := outputFile.Name() + ".png" + // Rename the temporary file + err = os.Rename(outputFile.Name(), output) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + if request.Seed == nil { + zVal := 0 // Idiomatic way to do this? Actually needed? + request.Seed = &zVal + } + + fn, err := imageGeneration(height, width, mode, step, *request.Seed, positive_prompt, negative_prompt, src, output, igbs.ml, bc, igbs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + if err := fn(); err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + item := &schema.Item{} + + if b64JSON { + defer os.RemoveAll(output) + data, err := os.ReadFile(output) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + item.B64JSON = base64.StdEncoding.EncodeToString(data) + } else { + base := filepath.Base(output) + item.URL = igbs.BaseUrlForGeneratedImages + base + } + + result = append(result, *item) + } + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Data: result, + } + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} + close(resultChannel) + }(request) + return resultChannel +} + +func imageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { + threads := backendConfig.Threads if *threads == 0 && appConfig.Threads != 0 { threads = &appConfig.Threads } + gRPCOpts := gRPCModelOpts(backendConfig) + opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), @@ -50,3 +284,24 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat return fn, nil } + +// TODO: Replace this function with pkg/downloader - no reason to have a (crappier) bespoke download file fn here, but get things working before that change. +func downloadFile(url string) (string, error) { + // Get the data + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // Create the file + out, err := os.CreateTemp("", "image") + if err != nil { + return "", err + } + defer out.Close() + + // Write the body to file + _, err = io.Copy(out, resp.Body) + return out.Name(), err +} diff --git a/core/backend/llm.go b/core/backend/llm.go index 493dc25c..1878e87a 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -11,17 +11,22 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" + "github.com/rs/zerolog/log" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -type LLMResponse struct { - Response string // should this be []byte? - Usage TokenUsage +type LLMRequest struct { + Id int // TODO Remove if not used. + Text string + Images []string + RawMessages []schema.Message + // TODO: Other Modalities? } type TokenUsage struct { @@ -29,57 +34,94 @@ type TokenUsage struct { Completion int } -func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { - modelFile := c.Model - threads := c.Threads - if *threads == 0 && o.Threads != 0 { - threads = &o.Threads +type LLMResponse struct { + Request *LLMRequest + Response string // should this be []byte? + Usage TokenUsage +} + +// TODO: Does this belong here or in core/services/openai.go? +type LLMResponseBundle struct { + Request *schema.OpenAIRequest + Response []schema.Choice + Usage TokenUsage +} + +type LLMBackendService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig + ftMutex sync.Mutex + cutstrings map[string]*regexp.Regexp +} + +func NewLLMBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *LLMBackendService { + return &LLMBackendService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + ftMutex: sync.Mutex{}, + cutstrings: make(map[string]*regexp.Regexp), } - grpcOpts := gRPCModelOpts(c) +} + +// TODO: Should ctx param be removed and replaced with hardcoded req.Context? +func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, bc *config.BackendConfig, enableTokenChannel bool) ( + resultChannel <-chan concurrency.ErrorOr[*LLMResponse], tokenChannel <-chan concurrency.ErrorOr[*LLMResponse], err error) { + + threads := bc.Threads + if (threads == nil || *threads == 0) && llmbs.appConfig.Threads != 0 { + threads = &llmbs.appConfig.Threads + } + + grpcOpts := gRPCModelOpts(bc) var inferenceModel grpc.Backend - var err error - opts := modelOpts(c, o, []model.Option{ + opts := modelOpts(bc, llmbs.appConfig, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup - model.WithAssetDir(o.AssetsDestination), - model.WithModel(modelFile), - model.WithContext(o.Context), + model.WithAssetDir(llmbs.appConfig.AssetsDestination), + model.WithModel(bc.Model), + model.WithContext(llmbs.appConfig.Context), }) - if c.Backend != "" { - opts = append(opts, model.WithBackendString(c.Backend)) + if bc.Backend != "" { + opts = append(opts, model.WithBackendString(bc.Backend)) } - // Check if the modelFile exists, if it doesn't try to load it from the gallery - if o.AutoloadGalleries { // experimental - if _, err := os.Stat(modelFile); os.IsNotExist(err) { + // Check if bc.Model exists, if it doesn't try to load it from the gallery + if llmbs.appConfig.AutoloadGalleries { // experimental + if _, err := os.Stat(bc.Model); os.IsNotExist(err) { utils.ResetDownloadTimers() // if we failed to load the model, we try to download it - err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) + err := gallery.InstallModelFromGalleryByName(llmbs.appConfig.Galleries, bc.Model, llmbs.appConfig.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) if err != nil { - return nil, err + return nil, nil, err } } } - if c.Backend == "" { - inferenceModel, err = loader.GreedyLoader(opts...) + if bc.Backend == "" { + log.Debug().Msgf("backend not known for %q, falling back to greedy loader to find it", bc.Model) + inferenceModel, err = llmbs.ml.GreedyLoader(opts...) } else { - inferenceModel, err = loader.BackendLoader(opts...) + inferenceModel, err = llmbs.ml.BackendLoader(opts...) } if err != nil { - return nil, err + log.Error().Err(err).Msg("[llmbs.Inference] failed to load a backend") + return } - var protoMessages []*proto.Message - // if we are using the tokenizer template, we need to convert the messages to proto messages - // unless the prompt has already been tokenized (non-chat endpoints + functions) - if c.TemplateConfig.UseTokenizerTemplate && s == "" { - protoMessages = make([]*proto.Message, len(messages), len(messages)) - for i, message := range messages { + grpcPredOpts := gRPCPredictOpts(bc, llmbs.appConfig.ModelPath) + grpcPredOpts.Prompt = req.Text + grpcPredOpts.Images = req.Images + + if bc.TemplateConfig.UseTokenizerTemplate && req.Text == "" { + grpcPredOpts.UseTokenizerTemplate = true + protoMessages := make([]*proto.Message, len(req.RawMessages), len(req.RawMessages)) + for i, message := range req.RawMessages { protoMessages[i] = &proto.Message{ Role: message.Role, } @@ -87,47 +129,32 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im case string: protoMessages[i].Content = ct default: - return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct) + err = fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct) + return } } } - // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported - fn := func() (LLMResponse, error) { - opts := gRPCPredictOpts(c, loader.ModelPath) - opts.Prompt = s - opts.Messages = protoMessages - opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate - opts.Images = images + tokenUsage := TokenUsage{} - tokenUsage := TokenUsage{} + promptInfo, pErr := inferenceModel.TokenizeString(ctx, grpcPredOpts) + if pErr == nil && promptInfo.Length > 0 { + tokenUsage.Prompt = int(promptInfo.Length) + } - // check the per-model feature flag for usage, since tokenCallback may have a cost. - // Defaults to off as for now it is still experimental - if c.FeatureFlag.Enabled("usage") { - userTokenCallback := tokenCallback - if userTokenCallback == nil { - userTokenCallback = func(token string, usage TokenUsage) bool { - return true - } - } + rawResultChannel := make(chan concurrency.ErrorOr[*LLMResponse]) + // TODO this next line is the biggest argument for taking named return values _back_ out!!! + var rawTokenChannel chan concurrency.ErrorOr[*LLMResponse] - promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts) - if pErr == nil && promptInfo.Length > 0 { - tokenUsage.Prompt = int(promptInfo.Length) - } + if enableTokenChannel { + rawTokenChannel = make(chan concurrency.ErrorOr[*LLMResponse]) - tokenCallback = func(token string, usage TokenUsage) bool { - tokenUsage.Completion++ - return userTokenCallback(token, tokenUsage) - } - } - - if tokenCallback != nil { - ss := "" + // TODO Needs better name + ss := "" + go func() { var partialRune []byte - err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { + err := inferenceModel.PredictStream(ctx, grpcPredOpts, func(chars []byte) { partialRune = append(partialRune, chars...) for len(partialRune) > 0 { @@ -137,48 +164,120 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im break } - tokenCallback(string(r), tokenUsage) + tokenUsage.Completion++ + rawTokenChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ + Response: string(r), + Usage: tokenUsage, + }} + ss += string(r) partialRune = partialRune[size:] } }) - return LLMResponse{ - Response: ss, - Usage: tokenUsage, - }, err - } else { - // TODO: Is the chicken bit the only way to get here? is that acceptable? - reply, err := inferenceModel.Predict(ctx, opts) + close(rawTokenChannel) if err != nil { - return LLMResponse{}, err + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} + } else { + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ + Response: ss, + Usage: tokenUsage, + }} } - return LLMResponse{ - Response: string(reply.Message), - Usage: tokenUsage, - }, err - } + close(rawResultChannel) + }() + } else { + go func() { + reply, err := inferenceModel.Predict(ctx, grpcPredOpts) + if err != nil { + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} + close(rawResultChannel) + } else { + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ + Response: string(reply.Message), + Usage: tokenUsage, + }} + close(rawResultChannel) + } + }() } - return fn, nil + resultChannel = rawResultChannel + tokenChannel = rawTokenChannel + return } -var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp) -var mu sync.Mutex = sync.Mutex{} +// TODO: Should predInput be a seperate param still, or should this fn handle extracting it from request?? +func (llmbs *LLMBackendService) GenerateText(predInput string, request *schema.OpenAIRequest, bc *config.BackendConfig, + mappingFn func(*LLMResponse) schema.Choice, enableCompletionChannels bool, enableTokenChannels bool) ( + // Returns: + resultChannel <-chan concurrency.ErrorOr[*LLMResponseBundle], completionChannels []<-chan concurrency.ErrorOr[*LLMResponse], tokenChannels []<-chan concurrency.ErrorOr[*LLMResponse], err error) { -func Finetune(config config.BackendConfig, input, prediction string) string { + rawChannel := make(chan concurrency.ErrorOr[*LLMResponseBundle]) + resultChannel = rawChannel + + if request.N == 0 { // number of completions to return + request.N = 1 + } + images := []string{} + for _, m := range request.Messages { + images = append(images, m.StringImages...) + } + + for i := 0; i < request.N; i++ { + + individualResultChannel, tokenChannel, infErr := llmbs.Inference(request.Context, &LLMRequest{ + Text: predInput, + Images: images, + RawMessages: request.Messages, + }, bc, enableTokenChannels) + if infErr != nil { + err = infErr // Avoids complaints about redeclaring err but looks dumb + return + } + completionChannels = append(completionChannels, individualResultChannel) + tokenChannels = append(tokenChannels, tokenChannel) + } + + go func() { + initialBundle := LLMResponseBundle{ + Request: request, + Response: []schema.Choice{}, + Usage: TokenUsage{}, + } + + wg := concurrency.SliceOfChannelsReducer(completionChannels, rawChannel, func(iv concurrency.ErrorOr[*LLMResponse], ov concurrency.ErrorOr[*LLMResponseBundle]) concurrency.ErrorOr[*LLMResponseBundle] { + if iv.Error != nil { + ov.Error = iv.Error + // TODO: Decide if we should wipe partials or not? + return ov + } + ov.Value.Usage.Prompt += iv.Value.Usage.Prompt + ov.Value.Usage.Completion += iv.Value.Usage.Completion + + ov.Value.Response = append(ov.Value.Response, mappingFn(iv.Value)) + return ov + }, concurrency.ErrorOr[*LLMResponseBundle]{Value: &initialBundle}, true) + wg.Wait() + + }() + + return +} + +func (llmbs *LLMBackendService) Finetune(config config.BackendConfig, input, prediction string) string { if config.Echo { prediction = input + prediction } for _, c := range config.Cutstrings { - mu.Lock() - reg, ok := cutstrings[c] + llmbs.ftMutex.Lock() + reg, ok := llmbs.cutstrings[c] if !ok { - cutstrings[c] = regexp.MustCompile(c) - reg = cutstrings[c] + llmbs.cutstrings[c] = regexp.MustCompile(c) + reg = llmbs.cutstrings[c] } - mu.Unlock() + llmbs.ftMutex.Unlock() prediction = reg.ReplaceAllString(prediction, "") } diff --git a/core/backend/options.go b/core/backend/options.go index 5b303b05..0b4e56db 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -10,7 +10,7 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" ) -func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { +func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { if so.SingleBackend { opts = append(opts, model.WithSingleActiveBackend()) } @@ -19,12 +19,12 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode opts = append(opts, model.EnableParallelRequests) } - if c.GRPC.Attempts != 0 { - opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts)) + if bc.GRPC.Attempts != 0 { + opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts)) } - if c.GRPC.AttemptsSleepTime != 0 { - opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) + if bc.GRPC.AttemptsSleepTime != 0 { + opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime)) } for k, v := range so.ExternalGRPCBackends { @@ -34,7 +34,7 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode return opts } -func getSeed(c config.BackendConfig) int32 { +func getSeed(c *config.BackendConfig) int32 { seed := int32(*c.Seed) if seed == config.RAND_SEED { seed = rand.Int31() @@ -43,7 +43,7 @@ func getSeed(c config.BackendConfig) int32 { return seed } -func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { +func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -104,47 +104,47 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { } } -func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { +func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions { promptCachePath := "" - if c.PromptCachePath != "" { - p := filepath.Join(modelPath, c.PromptCachePath) + if bc.PromptCachePath != "" { + p := filepath.Join(modelPath, bc.PromptCachePath) os.MkdirAll(filepath.Dir(p), 0755) promptCachePath = p } return &pb.PredictOptions{ - Temperature: float32(*c.Temperature), - TopP: float32(*c.TopP), - NDraft: c.NDraft, - TopK: int32(*c.TopK), - Tokens: int32(*c.Maxtokens), - Threads: int32(*c.Threads), - PromptCacheAll: c.PromptCacheAll, - PromptCacheRO: c.PromptCacheRO, + Temperature: float32(*bc.Temperature), + TopP: float32(*bc.TopP), + NDraft: bc.NDraft, + TopK: int32(*bc.TopK), + Tokens: int32(*bc.Maxtokens), + Threads: int32(*bc.Threads), + PromptCacheAll: bc.PromptCacheAll, + PromptCacheRO: bc.PromptCacheRO, PromptCachePath: promptCachePath, - F16KV: *c.F16, - DebugMode: *c.Debug, - Grammar: c.Grammar, - NegativePromptScale: c.NegativePromptScale, - RopeFreqBase: c.RopeFreqBase, - RopeFreqScale: c.RopeFreqScale, - NegativePrompt: c.NegativePrompt, - Mirostat: int32(*c.LLMConfig.Mirostat), - MirostatETA: float32(*c.LLMConfig.MirostatETA), - MirostatTAU: float32(*c.LLMConfig.MirostatTAU), - Debug: *c.Debug, - StopPrompts: c.StopWords, - Repeat: int32(c.RepeatPenalty), - NKeep: int32(c.Keep), - Batch: int32(c.Batch), - IgnoreEOS: c.IgnoreEOS, - Seed: getSeed(c), - FrequencyPenalty: float32(c.FrequencyPenalty), - MLock: *c.MMlock, - MMap: *c.MMap, - MainGPU: c.MainGPU, - TensorSplit: c.TensorSplit, - TailFreeSamplingZ: float32(*c.TFZ), - TypicalP: float32(*c.TypicalP), + F16KV: *bc.F16, + DebugMode: *bc.Debug, + Grammar: bc.Grammar, + NegativePromptScale: bc.NegativePromptScale, + RopeFreqBase: bc.RopeFreqBase, + RopeFreqScale: bc.RopeFreqScale, + NegativePrompt: bc.NegativePrompt, + Mirostat: int32(*bc.LLMConfig.Mirostat), + MirostatETA: float32(*bc.LLMConfig.MirostatETA), + MirostatTAU: float32(*bc.LLMConfig.MirostatTAU), + Debug: *bc.Debug, + StopPrompts: bc.StopWords, + Repeat: int32(bc.RepeatPenalty), + NKeep: int32(bc.Keep), + Batch: int32(bc.Batch), + IgnoreEOS: bc.IgnoreEOS, + Seed: getSeed(bc), + FrequencyPenalty: float32(bc.FrequencyPenalty), + MLock: *bc.MMlock, + MMap: *bc.MMap, + MainGPU: bc.MainGPU, + TensorSplit: bc.TensorSplit, + TailFreeSamplingZ: float32(*bc.TFZ), + TypicalP: float32(*bc.TypicalP), } } diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 4c3859df..6761c2ac 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -7,11 +7,48 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) { +type TranscriptionBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig +} + +func NewTranscriptionBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TranscriptionBackendService { + return &TranscriptionBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, + } +} + +func (tbs *TranscriptionBackendService) Transcribe(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.TranscriptionResult] { + responseChannel := make(chan concurrency.ErrorOr[*schema.TranscriptionResult]) + go func(request *schema.OpenAIRequest) { + bc, request, err := tbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, tbs.appConfig) + if err != nil { + responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: fmt.Errorf("failed reading parameters from request:%w", err)} + close(responseChannel) + return + } + + tr, err := modelTranscription(request.File, request.Language, tbs.ml, bc, tbs.appConfig) + if err != nil { + responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: err} + close(responseChannel) + return + } + responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Value: tr} + close(responseChannel) + }(request) + return responseChannel +} + +func modelTranscription(audio, language string, ml *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(model.WhisperBackend), diff --git a/core/backend/tts.go b/core/backend/tts.go index f97b6202..d1fa270d 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -7,29 +7,60 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -func generateUniqueFileName(dir, baseName, ext string) string { - counter := 1 - fileName := baseName + ext +type TextToSpeechBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig +} - for { - filePath := filepath.Join(dir, fileName) - _, err := os.Stat(filePath) - if os.IsNotExist(err) { - return fileName - } - - counter++ - fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) +func NewTextToSpeechBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TextToSpeechBackendService { + return &TextToSpeechBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, } } -func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { +func (ttsbs *TextToSpeechBackendService) TextToAudioFile(request *schema.TTSRequest) <-chan concurrency.ErrorOr[*string] { + responseChannel := make(chan concurrency.ErrorOr[*string]) + go func(request *schema.TTSRequest) { + cfg, err := ttsbs.bcl.LoadBackendConfigFileByName(request.Model, ttsbs.appConfig.ModelPath, + config.LoadOptionDebug(ttsbs.appConfig.Debug), + config.LoadOptionThreads(ttsbs.appConfig.Threads), + config.LoadOptionContextSize(ttsbs.appConfig.ContextSize), + config.LoadOptionF16(ttsbs.appConfig.F16), + ) + if err != nil { + responseChannel <- concurrency.ErrorOr[*string]{Error: err} + close(responseChannel) + return + } + + if request.Backend != "" { + cfg.Backend = request.Backend + } + + outFile, _, err := modelTTS(cfg.Backend, request.Input, cfg.Model, request.Voice, ttsbs.ml, ttsbs.appConfig, cfg) + if err != nil { + responseChannel <- concurrency.ErrorOr[*string]{Error: err} + close(responseChannel) + return + } + responseChannel <- concurrency.ErrorOr[*string]{Value: &outFile} + close(responseChannel) + }(request) + return responseChannel +} + +func modelTTS(backend, text, modelFile string, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig *config.BackendConfig) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend @@ -37,7 +68,7 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, grpcOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ + opts := modelOpts(&config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), model.WithContext(appConfig.Context), @@ -87,3 +118,19 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, return filePath, res, err } + +func generateUniqueFileName(dir, baseName, ext string) string { + counter := 1 + fileName := baseName + ext + + for { + filePath := filepath.Join(dir, fileName) + _, err := os.Stat(filePath) + if os.IsNotExist(err) { + return fileName + } + + counter++ + fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) + } +} diff --git a/core/cli/run.go b/core/cli/run.go index 09d09979..c3b186c0 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -124,11 +124,11 @@ func (r *RunCMD) Run(ctx *Context) error { } if r.PreloadBackendOnly { - _, _, _, err := startup.Startup(opts...) + _, err := startup.Startup(opts...) return err } - cl, ml, options, err := startup.Startup(opts...) + application, err := startup.Startup(opts...) if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) @@ -137,7 +137,7 @@ func (r *RunCMD) Run(ctx *Context) error { // Watch the configuration directory // If the directory does not exist, we don't watch it if _, err := os.Stat(r.LocalaiConfigDir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) + closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, application.ApplicationConfig) defer closeConfigWatcherFn() if err != nil { @@ -145,7 +145,7 @@ func (r *RunCMD) Run(ctx *Context) error { } } - appHTTP, err := http.App(cl, ml, options) + appHTTP, err := http.App(application) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") return err diff --git a/core/cli/transcript.go b/core/cli/transcript.go index 9f36a77c..f14a1a87 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -7,6 +7,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -43,11 +44,21 @@ func (t *TranscriptCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) - if err != nil { - return err + tbs := backend.NewTranscriptionBackendService(ml, cl, opts) + + resultChannel := tbs.Transcribe(&schema.OpenAIRequest{ + PredictionOptions: schema.PredictionOptions{ + Language: t.Language, + }, + File: t.Filename, + }) + + r := <-resultChannel + + if r.Error != nil { + return r.Error } - for _, segment := range tr.Segments { + for _, segment := range r.Value.Segments { fmt.Println(segment.Start.String(), "-", segment.Text) } return nil diff --git a/core/cli/tts.go b/core/cli/tts.go index 1d8fd3a3..c7758c48 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -9,6 +9,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -42,20 +43,29 @@ func (t *TTSCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - options := config.BackendConfig{} - options.SetDefaults() + ttsbs := backend.NewTextToSpeechBackendService(ml, config.NewBackendConfigLoader(), opts) - filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options) - if err != nil { - return err + request := &schema.TTSRequest{ + Model: t.Model, + Input: text, + Backend: t.Backend, + Voice: t.Voice, + } + + resultsChannel := ttsbs.TextToAudioFile(request) + + rawResult := <-resultsChannel + + if rawResult.Error != nil { + return rawResult.Error } if outputFile != "" { - if err := os.Rename(filePath, outputFile); err != nil { + if err := os.Rename(*rawResult.Value, outputFile); err != nil { return err } - fmt.Printf("Generate file %s\n", outputFile) + fmt.Printf("Generated file %q\n", outputFile) } else { - fmt.Printf("Generate file %s\n", filePath) + fmt.Printf("Generated file %q\n", *rawResult.Value) } return nil } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 81c92d01..47e4829d 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -1,22 +1,7 @@ package config import ( - "errors" - "fmt" - "io/fs" - "os" - "path/filepath" - "sort" - "strings" - "sync" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/downloader" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" - "gopkg.in/yaml.v3" - - "github.com/charmbracelet/glamour" ) const ( @@ -199,7 +184,7 @@ func (c *BackendConfig) FunctionToCall() string { } func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { - lo := &LoadOptions{} + lo := &ConfigLoaderOptions{} lo.Apply(opts...) ctx := lo.ctxSize @@ -312,287 +297,3 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.Debug = &trueV } } - -////// Config Loader //////// - -type BackendConfigLoader struct { - configs map[string]BackendConfig - sync.Mutex -} - -type LoadOptions struct { - debug bool - threads, ctxSize int - f16 bool -} - -func LoadOptionDebug(debug bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.debug = debug - } -} - -func LoadOptionThreads(threads int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.threads = threads - } -} - -func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.ctxSize = ctxSize - } -} - -func LoadOptionF16(f16 bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.f16 = f16 - } -} - -type ConfigLoaderOption func(*LoadOptions) - -func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { - for _, l := range options { - l(lo) - } -} - -// Load a config file for a model -func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - - // Load a config file if present after the model name - cfg := &BackendConfig{ - PredictionOptions: schema.PredictionOptions{ - Model: modelName, - }, - } - - cfgExisting, exists := cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } else { - // Try loading a model config file - modelConfig := filepath.Join(modelPath, modelName+".yaml") - if _, err := os.Stat(modelConfig); err == nil { - if err := cl.LoadBackendConfig( - modelConfig, opts..., - ); err != nil { - return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } - } - } - - cfg.SetDefaults(opts...) - - return cfg, nil -} - -func NewBackendConfigLoader() *BackendConfigLoader { - return &BackendConfigLoader{ - configs: make(map[string]BackendConfig), - } -} -func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { - c := &[]*BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - for _, cc := range *c { - cc.SetDefaults(opts...) - } - - return *c, nil -} - -func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - lo := &LoadOptions{} - lo.Apply(opts...) - - c := &BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - c.SetDefaults(opts...) - return c, nil -} - -func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - c, err := ReadBackendConfigFile(file, opts...) - if err != nil { - return fmt.Errorf("cannot load config file: %w", err) - } - - for _, cc := range c { - cm.configs[cc.Name] = *cc - } - return nil -} - -func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { - cl.Lock() - defer cl.Unlock() - c, err := ReadBackendConfig(file, opts...) - if err != nil { - return fmt.Errorf("cannot read config file: %w", err) - } - - cl.configs[c.Name] = *c - return nil -} - -func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { - cl.Lock() - defer cl.Unlock() - v, exists := cl.configs[m] - return v, exists -} - -func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { - cl.Lock() - defer cl.Unlock() - var res []BackendConfig - for _, v := range cl.configs { - res = append(res, v) - } - - sort.SliceStable(res, func(i, j int) bool { - return res[i].Name < res[j].Name - }) - - return res -} - -func (cl *BackendConfigLoader) ListBackendConfigs() []string { - cl.Lock() - defer cl.Unlock() - var res []string - for k := range cl.configs { - res = append(res, k) - } - return res -} - -// Preload prepare models if they are not local but url or huggingface repositories -func (cl *BackendConfigLoader) Preload(modelPath string) error { - cl.Lock() - defer cl.Unlock() - - status := func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - } - - log.Info().Msgf("Preloading models from %s", modelPath) - - renderMode := "dark" - if os.Getenv("COLOR") != "" { - renderMode = os.Getenv("COLOR") - } - - glamText := func(t string) { - out, err := glamour.Render(t, renderMode) - if err == nil && os.Getenv("NO_COLOR") == "" { - fmt.Println(out) - } else { - fmt.Println(t) - } - } - - for i, config := range cl.configs { - - // Download files and verify their SHA - for _, file := range config.DownloadFiles { - log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) - - if err := utils.VerifyPath(file.Filename, modelPath); err != nil { - return err - } - // Create file path - filePath := filepath.Join(modelPath, file.Filename) - - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { - return err - } - } - - modelURL := config.PredictionOptions.Model - modelURL = downloader.ConvertURL(modelURL) - - if downloader.LooksLikeURL(modelURL) { - // md5 of model name - md5Name := utils.MD5(modelURL) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) - if err != nil { - return err - } - } - - cc := cl.configs[i] - c := &cc - c.PredictionOptions.Model = md5Name - cl.configs[i] = *c - } - if cl.configs[i].Name != "" { - glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) - } - if cl.configs[i].Description != "" { - //glamText("**Description**") - glamText(cl.configs[i].Description) - } - if cl.configs[i].Usage != "" { - //glamText("**Usage**") - glamText(cl.configs[i].Usage) - } - } - return nil -} - -// LoadBackendConfigsFromPath reads all the configurations of the models from a path -// (non-recursive) -func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - entries, err := os.ReadDir(path) - if err != nil { - return err - } - files := make([]fs.FileInfo, 0, len(entries)) - for _, entry := range entries { - info, err := entry.Info() - if err != nil { - return err - } - files = append(files, info) - } - for _, file := range files { - // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { - continue - } - c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) - if err == nil { - cm.configs[c.Name] = *c - } - } - - return nil -} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go new file mode 100644 index 00000000..62dfc1e0 --- /dev/null +++ b/core/config/backend_config_loader.go @@ -0,0 +1,509 @@ +package config + +import ( + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + "github.com/charmbracelet/glamour" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" + "gopkg.in/yaml.v2" +) + +type BackendConfigLoader struct { + configs map[string]BackendConfig + sync.Mutex +} + +type ConfigLoaderOptions struct { + debug bool + threads, ctxSize int + f16 bool +} + +func LoadOptionDebug(debug bool) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.debug = debug + } +} + +func LoadOptionThreads(threads int) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.threads = threads + } +} + +func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.ctxSize = ctxSize + } +} + +func LoadOptionF16(f16 bool) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.f16 = f16 + } +} + +type ConfigLoaderOption func(*ConfigLoaderOptions) + +func (lo *ConfigLoaderOptions) Apply(options ...ConfigLoaderOption) { + for _, l := range options { + l(lo) + } +} + +func NewBackendConfigLoader() *BackendConfigLoader { + return &BackendConfigLoader{ + configs: make(map[string]BackendConfig), + } +} + +func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { + bcl.Lock() + defer bcl.Unlock() + c, err := readBackendConfig(file, opts...) + if err != nil { + return fmt.Errorf("cannot read config file: %w", err) + } + + bcl.configs[c.Name] = *c + return nil +} + +func (bcl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { + bcl.Lock() + defer bcl.Unlock() + v, exists := bcl.configs[m] + return v, exists +} + +func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { + bcl.Lock() + defer bcl.Unlock() + var res []BackendConfig + for _, v := range bcl.configs { + res = append(res, v) + } + sort.SliceStable(res, func(i, j int) bool { + return res[i].Name < res[j].Name + }) + return res +} + +func (bcl *BackendConfigLoader) ListBackendConfigs() []string { + bcl.Lock() + defer bcl.Unlock() + var res []string + for k := range bcl.configs { + res = append(res, k) + } + return res +} + +// Preload prepare models if they are not local but url or huggingface repositories +func (bcl *BackendConfigLoader) Preload(modelPath string) error { + bcl.Lock() + defer bcl.Unlock() + + status := func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + } + + log.Info().Msgf("Preloading models from %s", modelPath) + + renderMode := "dark" + if os.Getenv("COLOR") != "" { + renderMode = os.Getenv("COLOR") + } + + glamText := func(t string) { + out, err := glamour.Render(t, renderMode) + if err == nil && os.Getenv("NO_COLOR") == "" { + fmt.Println(out) + } else { + fmt.Println(t) + } + } + + for i, config := range bcl.configs { + + // Download files and verify their SHA + for _, file := range config.DownloadFiles { + log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) + + if err := utils.VerifyPath(file.Filename, modelPath); err != nil { + return err + } + // Create file path + filePath := filepath.Join(modelPath, file.Filename) + + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { + return err + } + } + + modelURL := config.PredictionOptions.Model + modelURL = downloader.ConvertURL(modelURL) + + if downloader.LooksLikeURL(modelURL) { + // md5 of model name + md5Name := utils.MD5(modelURL) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) + if err != nil { + return err + } + } + + cc := bcl.configs[i] + c := &cc + c.PredictionOptions.Model = md5Name + bcl.configs[i] = *c + } + if bcl.configs[i].Name != "" { + glamText(fmt.Sprintf("**Model name**: _%s_", bcl.configs[i].Name)) + } + if bcl.configs[i].Description != "" { + //glamText("**Description**") + glamText(bcl.configs[i].Description) + } + if bcl.configs[i].Usage != "" { + //glamText("**Usage**") + glamText(bcl.configs[i].Usage) + } + } + return nil +} + +func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { + bcl.Lock() + defer bcl.Unlock() + entries, err := os.ReadDir(path) + if err != nil { + return err + } + files := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return err + } + files = append(files, info) + } + for _, file := range files { + // Skip templates, YAML and .keep files + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { + continue + } + c, err := readBackendConfig(filepath.Join(path, file.Name()), opts...) + if err == nil { + bcl.configs[c.Name] = *c + } + } + + return nil +} + +func (bcl *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { + bcl.Lock() + defer bcl.Unlock() + c, err := readBackendConfigFile(file, opts...) + if err != nil { + return fmt.Errorf("cannot load config file: %w", err) + } + + for _, cc := range c { + bcl.configs[cc.Name] = *cc + } + return nil +} + +////////// + +// Load a config file for a model +func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName string, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + + // Load a config file if present after the model name + cfg := &BackendConfig{ + PredictionOptions: schema.PredictionOptions{ + Model: modelName, + }, + } + + cfgExisting, exists := bcl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + // Load a config file if present after the model name + modelConfig := filepath.Join(modelPath, modelName+".yaml") + if _, err := os.Stat(modelConfig); err == nil { + if err := bcl.LoadBackendConfig(modelConfig); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = bcl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } + } + } + + cfg.SetDefaults(opts...) + return cfg, nil +} + +func readBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { + c := &[]*BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + for _, cc := range *c { + cc.SetDefaults(opts...) + } + + return *c, nil +} + +func readBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + c := &BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + c.SetDefaults(opts...) + return c, nil +} + +func (bcl *BackendConfigLoader) LoadBackendConfigForModelAndOpenAIRequest(modelFile string, input *schema.OpenAIRequest, appConfig *ApplicationConfig) (*BackendConfig, *schema.OpenAIRequest, error) { + cfg, err := bcl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + LoadOptionContextSize(appConfig.ContextSize), + LoadOptionDebug(appConfig.Debug), + LoadOptionF16(appConfig.F16), + LoadOptionThreads(appConfig.Threads), + ) + + // Set the parameters for the language model prediction + updateBackendConfigFromOpenAIRequest(cfg, input) + + return cfg, input, err +} + +func updateBackendConfigFromOpenAIRequest(bc *BackendConfig, request *schema.OpenAIRequest) { + if request.Echo { + bc.Echo = request.Echo + } + if request.TopK != nil && *request.TopK != 0 { + bc.TopK = request.TopK + } + if request.TopP != nil && *request.TopP != 0 { + bc.TopP = request.TopP + } + + if request.Backend != "" { + bc.Backend = request.Backend + } + + if request.ClipSkip != 0 { + bc.Diffusers.ClipSkip = request.ClipSkip + } + + if request.ModelBaseName != "" { + bc.AutoGPTQ.ModelBaseName = request.ModelBaseName + } + + if request.NegativePromptScale != 0 { + bc.NegativePromptScale = request.NegativePromptScale + } + + if request.UseFastTokenizer { + bc.UseFastTokenizer = request.UseFastTokenizer + } + + if request.NegativePrompt != "" { + bc.NegativePrompt = request.NegativePrompt + } + + if request.RopeFreqBase != 0 { + bc.RopeFreqBase = request.RopeFreqBase + } + + if request.RopeFreqScale != 0 { + bc.RopeFreqScale = request.RopeFreqScale + } + + if request.Grammar != "" { + bc.Grammar = request.Grammar + } + + if request.Temperature != nil && *request.Temperature != 0 { + bc.Temperature = request.Temperature + } + + if request.Maxtokens != nil && *request.Maxtokens != 0 { + bc.Maxtokens = request.Maxtokens + } + + switch stop := request.Stop.(type) { + case string: + if stop != "" { + bc.StopWords = append(bc.StopWords, stop) + } + case []interface{}: + for _, pp := range stop { + if s, ok := pp.(string); ok { + bc.StopWords = append(bc.StopWords, s) + } + } + } + + if len(request.Tools) > 0 { + for _, tool := range request.Tools { + request.Functions = append(request.Functions, tool.Function) + } + } + + if request.ToolsChoice != nil { + var toolChoice grammar.Tool + switch content := request.ToolsChoice.(type) { + case string: + _ = json.Unmarshal([]byte(content), &toolChoice) + case map[string]interface{}: + dat, _ := json.Marshal(content) + _ = json.Unmarshal(dat, &toolChoice) + } + request.FunctionCall = map[string]interface{}{ + "name": toolChoice.Function.Name, + } + } + + // Decode each request's message content + index := 0 + for i, m := range request.Messages { + switch content := m.Content.(type) { + case string: + request.Messages[i].StringContent = content + case []interface{}: + dat, _ := json.Marshal(content) + c := []schema.Content{} + json.Unmarshal(dat, &c) + for _, pp := range c { + if pp.Type == "text" { + request.Messages[i].StringContent = pp.Text + } else if pp.Type == "image_url" { + // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: + base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL) + if err == nil { + request.Messages[i].StringImages = append(request.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff + // set a placeholder for each image + request.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + request.Messages[i].StringContent + index++ + } else { + fmt.Print("Failed encoding image", err) + } + } + } + } + } + + if request.RepeatPenalty != 0 { + bc.RepeatPenalty = request.RepeatPenalty + } + + if request.FrequencyPenalty != 0 { + bc.FrequencyPenalty = request.FrequencyPenalty + } + + if request.PresencePenalty != 0 { + bc.PresencePenalty = request.PresencePenalty + } + + if request.Keep != 0 { + bc.Keep = request.Keep + } + + if request.Batch != 0 { + bc.Batch = request.Batch + } + + if request.IgnoreEOS { + bc.IgnoreEOS = request.IgnoreEOS + } + + if request.Seed != nil { + bc.Seed = request.Seed + } + + if request.TypicalP != nil { + bc.TypicalP = request.TypicalP + } + + switch inputs := request.Input.(type) { + case string: + if inputs != "" { + bc.InputStrings = append(bc.InputStrings, inputs) + } + case []interface{}: + for _, pp := range inputs { + switch i := pp.(type) { + case string: + bc.InputStrings = append(bc.InputStrings, i) + case []interface{}: + tokens := []int{} + for _, ii := range i { + tokens = append(tokens, int(ii.(float64))) + } + bc.InputToken = append(bc.InputToken, tokens) + } + } + } + + // Can be either a string or an object + switch fnc := request.FunctionCall.(type) { + case string: + if fnc != "" { + bc.SetFunctionCallString(fnc) + } + case map[string]interface{}: + var name string + n, exists := fnc["name"] + if exists { + nn, e := n.(string) + if e { + name = nn + } + } + bc.SetFunctionCallNameString(name) + } + + switch p := request.Prompt.(type) { + case string: + bc.PromptStrings = append(bc.PromptStrings, p) + case []interface{}: + for _, pp := range p { + if s, ok := pp.(string); ok { + bc.PromptStrings = append(bc.PromptStrings, s) + } + } + } +} diff --git a/core/config/exports_test.go b/core/config/exports_test.go new file mode 100644 index 00000000..70ba84e6 --- /dev/null +++ b/core/config/exports_test.go @@ -0,0 +1,6 @@ +package config + +// This file re-exports private functions to be used directly in unit tests. +// Since this file's name ends in _test.go, theoretically these should not be exposed past the tests. + +var ReadBackendConfigFile = readBackendConfigFile diff --git a/core/http/api.go b/core/http/api.go index af38512a..5c9095ea 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -1,23 +1,20 @@ package http import ( - "encoding/json" "errors" - "os" "strings" - "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/go-skynet/LocalAI/core" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/gofiber/swagger" // swagger handler "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" - - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" @@ -55,13 +52,12 @@ func readAuthHeader(c *fiber.Ctx) string { // @securityDefinitions.apikey BearerAuth // @in header // @name Authorization - -func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { +func App(application *core.Application) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ Views: renderEngine(), - BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB - DisableStartupMessage: appConfig.DisableMessage, + BodyLimit: application.ApplicationConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + DisableStartupMessage: application.ApplicationConfig.DisableMessage, // Override default error handler ErrorHandler: func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -82,7 +78,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi }, }) - if appConfig.Debug { + if application.ApplicationConfig.Debug { app.Use(logger.New(logger.Config{ Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", })) @@ -90,7 +86,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Default middleware config - if !appConfig.Debug { + if !application.ApplicationConfig.Debug { app.Use(recover.New()) } @@ -108,27 +104,27 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Auth middleware checking if API key is valid. If no API key is set, no auth is required. auth := func(c *fiber.Ctx) error { - if len(appConfig.ApiKeys) == 0 { + if len(application.ApplicationConfig.ApiKeys) == 0 { return c.Next() } - // Check for api_keys.json file - fileContent, err := os.ReadFile("api_keys.json") - if err == nil { - // Parse JSON content from the file - var fileKeys []string - err := json.Unmarshal(fileContent, &fileKeys) - if err != nil { - return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) - } + // // Check for api_keys.json file + // fileContent, err := os.ReadFile("api_keys.json") + // if err == nil { + // // Parse JSON content from the file + // var fileKeys []string + // err := json.Unmarshal(fileContent, &fileKeys) + // if err != nil { + // return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) + // } - // Add file keys to options.ApiKeys - appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) - } + // // Add file keys to options.ApiKeys + // application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...) + // } - if len(appConfig.ApiKeys) == 0 { - return c.Next() - } + // if len(application.ApplicationConfig.ApiKeys) == 0 { + // return c.Next() + // } authHeader := readAuthHeader(c) if authHeader == "" { @@ -142,7 +138,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi } apiKey := authHeaderParts[1] - for _, key := range appConfig.ApiKeys { + for _, key := range application.ApplicationConfig.ApiKeys { if apiKey == key { return c.Next() } @@ -151,20 +147,22 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"}) } - if appConfig.CORS { + if application.ApplicationConfig.CORS { var c func(ctx *fiber.Ctx) error - if appConfig.CORSAllowOrigins == "" { + if application.ApplicationConfig.CORSAllowOrigins == "" { c = cors.New() } else { - c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins}) + c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig.CORSAllowOrigins}) } app.Use(c) } + fiberContextExtractor := fiberContext.NewFiberContextExtractor(application.ModelLoader, application.ApplicationConfig) + // LocalAI API endpoints - galleryService := services.NewGalleryService(appConfig.ModelPath) - galleryService.Start(appConfig.Context, cl) + galleryService := services.NewGalleryService(application.ApplicationConfig.ModelPath) + galleryService.Start(application.ApplicationConfig.Context, application.BackendConfigLoader) app.Get("/version", auth, func(c *fiber.Ctx) error { return c.JSON(struct { @@ -172,29 +170,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi }{Version: internal.PrintableVersion()}) }) - // Make sure directories exists - os.MkdirAll(appConfig.ImageDir, 0755) - os.MkdirAll(appConfig.AudioDir, 0755) - os.MkdirAll(appConfig.UploadDir, 0755) - os.MkdirAll(appConfig.ConfigsDir, 0755) - os.MkdirAll(appConfig.ModelPath, 0755) - - // Load config jsons - utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - app.Get("/swagger/*", swagger.HandlerDefault) // default welcomeRoute( app, - cl, - ml, - appConfig, + application.BackendConfigLoader, + application.ModelLoader, + application.ApplicationConfig, auth, ) - modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(application.ApplicationConfig.Galleries, application.ApplicationConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) @@ -203,83 +189,85 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) - app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) - - // Elevenlabs - app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) - // Stores - sl := model.NewModelLoader("") - app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) - app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) - app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) - app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) + storeLoader := model.NewModelLoader("") // TODO: Investigate if this should be migrated to application and reused. Should the path be configurable? Merging for now. + app.Post("/stores/set", auth, localai.StoresSetEndpoint(storeLoader, application.ApplicationConfig)) + app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(storeLoader, application.ApplicationConfig)) + app.Post("/stores/get", auth, localai.StoresGetEndpoint(storeLoader, application.ApplicationConfig)) + app.Post("/stores/find", auth, localai.StoresFindEndpoint(storeLoader, application.ApplicationConfig)) - // openAI compatible API endpoint + // openAI compatible API endpoints // chat - app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) - app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) // edit - app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) - app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/v1/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) // assistant - app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + // TODO: Refactor this to the new style eventually + app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) // files - app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) - app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Post("/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) // completion - app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) // embeddings - app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) + app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) + app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) // audio - app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) - app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(fiberContextExtractor, application.TranscriptionBackendService)) + app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) // images - app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) + app.Post("/v1/images/generations", auth, openai.ImageEndpoint(fiberContextExtractor, application.ImageGenerationBackendService)) - if appConfig.ImageDir != "" { - app.Static("/generated-images", appConfig.ImageDir) + // Elevenlabs + app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + + // LocalAI TTS? + app.Post("/tts", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + + if application.ApplicationConfig.ImageDir != "" { + app.Static("/generated-images", application.ApplicationConfig.ImageDir) } - if appConfig.AudioDir != "" { - app.Static("/generated-audio", appConfig.AudioDir) + if application.ApplicationConfig.AudioDir != "" { + app.Static("/generated-audio", application.ApplicationConfig.AudioDir) } ok := func(c *fiber.Ctx) error { @@ -291,13 +279,12 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Get("/readyz", ok) // Experimental Backend Statistics Module - backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(application.BackendMonitorService)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(application.BackendMonitorService)) // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/v1/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) + app.Get("/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) diff --git a/core/http/api_test.go b/core/http/api_test.go index 1553ed21..bf8feb1c 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -12,7 +12,9 @@ import ( "os" "path/filepath" "runtime" + "strings" + "github.com/go-skynet/LocalAI/core" "github.com/go-skynet/LocalAI/core/config" . "github.com/go-skynet/LocalAI/core/http" "github.com/go-skynet/LocalAI/core/schema" @@ -205,9 +207,7 @@ var _ = Describe("API test", func() { var cancel context.CancelFunc var tmpdir string var modelDir string - var bcl *config.BackendConfigLoader - var ml *model.ModelLoader - var applicationConfig *config.ApplicationConfig + var application *core.Application commonOpts := []config.AppOption{ config.WithDebug(true), @@ -252,7 +252,7 @@ var _ = Describe("API test", func() { }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithGalleries(galleries), @@ -261,7 +261,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(backendAssetsDir))...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -474,11 +474,11 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) + Expect(resp2.Choices[0].Message.ToolCalls[0].Function).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -487,9 +487,9 @@ var _ = Describe("API test", func() { }) It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } + // if runtime.GOOS != "linux" { + // Skip("test supported only on linux") + // } modelName := "codellama" response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml", @@ -504,7 +504,7 @@ var _ = Describe("API test", func() { Eventually(func() bool { response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) return response["processed"].(bool) - }, "360s", "10s").Should(Equal(true)) + }, "480s", "10s").Should(Equal(true)) By("testing chat") resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ @@ -551,11 +551,13 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) + fmt.Printf("\n--- %+v\n\n", resp2.Choices[0].Message) + Expect(resp2.Choices[0].Message.ToolCalls).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.ToolCalls[0]).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -609,7 +611,7 @@ var _ = Describe("API test", func() { }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithAudioDir(tmpdir), @@ -620,7 +622,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(tmpdir))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -724,14 +726,14 @@ var _ = Describe("API test", func() { var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -761,6 +763,11 @@ var _ = Describe("API test", func() { Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions via ggml", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -768,6 +775,11 @@ var _ = Describe("API test", func() { }) It("can generate chat completions via ggml", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -775,6 +787,11 @@ var _ = Describe("API test", func() { }) It("can generate completions from model configs", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -782,6 +799,11 @@ var _ = Describe("API test", func() { }) It("can generate chat completions from model configs", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -868,9 +890,9 @@ var _ = Describe("API test", func() { Context("backends", func() { It("runs rwkv completion", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } + // if runtime.GOOS != "linux" { + // Skip("test supported only on linux") + // } resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices) > 0).To(BeTrue()) @@ -891,17 +913,20 @@ var _ = Describe("API test", func() { } Expect(err).ToNot(HaveOccurred()) - text += response.Choices[0].Text - tokens++ + + if len(response.Choices) > 0 { + text += response.Choices[0].Text + tokens++ + } } Expect(text).ToNot(BeEmpty()) Expect(text).To(ContainSubstring("five")) Expect(tokens).ToNot(Or(Equal(1), Equal(0))) }) It("runs rwkv chat completion", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } + // if runtime.GOOS != "linux" { + // Skip("test supported only on linux") + // } resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}}) Expect(err).ToNot(HaveOccurred()) @@ -1010,14 +1035,14 @@ var _ = Describe("API test", func() { c, cancel = context.WithCancel(context.Background()) var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithModelPath(modelPath), config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -1041,18 +1066,33 @@ var _ = Describe("API test", func() { } }) It("can generate chat completions from config file (list1)", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate chat completions from config file (list2)", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate edit completions from config file", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + request := openaigo.EditCreateRequestBody{ Model: "list2", Instruction: "foo", diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go index ffb63111..99fbcde9 100644 --- a/core/http/ctx/fiber.go +++ b/core/http/ctx/fiber.go @@ -1,43 +1,88 @@ package fiberContext import ( + "context" + "encoding/json" "fmt" "strings" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) +type FiberContextExtractor struct { + ml *model.ModelLoader + appConfig *config.ApplicationConfig +} + +func NewFiberContextExtractor(ml *model.ModelLoader, appConfig *config.ApplicationConfig) *FiberContextExtractor { + return &FiberContextExtractor{ + ml: ml, + appConfig: appConfig, + } +} + // ModelFromContext returns the model from the context // If no model is specified, it will take the first available // Takes a model string as input which should be the one received from the user request. // It returns the model name resolved from the context and an error if any. -func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { - if ctx.Params("model") != "" { - modelInput = ctx.Params("model") +func (fce *FiberContextExtractor) ModelFromContext(ctx *fiber.Ctx, modelInput string, firstModel bool) (string, error) { + ctxPM := ctx.Params("model") + if ctxPM != "" { + log.Debug().Msgf("[FCE] Overriding param modelInput %q with ctx.Params value %q", modelInput, ctxPM) + modelInput = ctxPM } // Set model from bearer token, if available - bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ") - bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) + bearer := strings.TrimPrefix(ctx.Get("authorization"), "Bearer ") + bearerExists := bearer != "" && fce.ml.ExistsInModelPath(bearer) // If no model was specified, take the first available if modelInput == "" && !bearerExists && firstModel { - models, _ := loader.ListModels() + models, _ := fce.ml.ListModels() if len(models) > 0 { modelInput = models[0] - log.Debug().Msgf("No model specified, using: %s", modelInput) + log.Debug().Msgf("[FCE] No model specified, using first available: %s", modelInput) } else { - log.Debug().Msgf("No model specified, returning error") - return "", fmt.Errorf("no model specified") + log.Warn().Msgf("[FCE] No model specified, none available") + return "", fmt.Errorf("[fce] no model specified, none available") } } // If a model is found in bearer token takes precedence if bearerExists { - log.Debug().Msgf("Using model from bearer token: %s", bearer) + log.Debug().Msgf("[FCE] Using model from bearer token: %s", bearer) modelInput = bearer } + + if modelInput == "" { + log.Warn().Msg("[FCE] modelInput is empty") + } return modelInput, nil } + +// TODO: Do we still need the first return value? +func (fce *FiberContextExtractor) OpenAIRequestFromContext(c *fiber.Ctx, firstModel bool) (string, *schema.OpenAIRequest, error) { + input := new(schema.OpenAIRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return "", nil, fmt.Errorf("failed parsing request body: %w", err) + } + + received, _ := json.Marshal(input) + + ctx, cancel := context.WithCancel(fce.appConfig.Context) + input.Context = ctx + input.Cancel = cancel + + log.Debug().Msgf("Request received: %s", string(received)) + + var err error + input.Model, err = fce.ModelFromContext(c, input.Model, firstModel) + + return input.Model, input, err +} diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index 841f9b5f..4f5db463 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -2,9 +2,7 @@ package elevenlabs import ( "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -17,7 +15,7 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/text-to-speech/{voice-id} [post] -func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.ElevenLabsTTSRequest) @@ -28,34 +26,21 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi return err } - modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false) + var err error + input.ModelID, err = fce.ModelFromContext(c, input.ModelID, false) if err != nil { - modelFile = input.ModelID log.Warn().Msgf("Model not found in context: %s", input.ModelID) } - cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, - config.LoadOptionDebug(appConfig.Debug), - config.LoadOptionThreads(appConfig.Threads), - config.LoadOptionContextSize(appConfig.ContextSize), - config.LoadOptionF16(appConfig.F16), - ) - if err != nil { - modelFile = input.ModelID - log.Warn().Msgf("Model not found in context: %s", input.ModelID) - } else { - if input.ModelID != "" { - modelFile = input.ModelID - } else { - modelFile = cfg.Model - } + responseChannel := ttsbs.TextToAudioFile(&schema.TTSRequest{ + Model: input.ModelID, + Voice: voiceID, + Input: input.Text, + }) + rawValue := <-responseChannel + if rawValue.Error != nil { + return rawValue.Error } - log.Debug().Msgf("Request for model: %s", modelFile) - - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg) - if err != nil { - return err - } - return c.Download(filePath) + return c.Download(*rawValue.Value) } } diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index 8c7a664a..dac20388 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -6,7 +6,7 @@ import ( "github.com/gofiber/fiber/v2" ) -func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) @@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error } } -func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) // Get input data from the request body diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 7822e024..df7841fb 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -2,9 +2,7 @@ package localai import ( "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -16,45 +14,26 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/audio/speech [post] -func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - + var err error input := new(schema.TTSRequest) // Get input data from the request body - if err := c.BodyParser(input); err != nil { + if err = c.BodyParser(input); err != nil { return err } - modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) + input.Model, err = fce.ModelFromContext(c, input.Model, false) if err != nil { - modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } - cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, - config.LoadOptionDebug(appConfig.Debug), - config.LoadOptionThreads(appConfig.Threads), - config.LoadOptionContextSize(appConfig.ContextSize), - config.LoadOptionF16(appConfig.F16), - ) - - if err != nil { - modelFile = input.Model - log.Warn().Msgf("Model not found in context: %s", input.Model) - } else { - modelFile = cfg.Model + responseChannel := ttsbs.TextToAudioFile(input) + rawValue := <-responseChannel + if rawValue.Error != nil { + return rawValue.Error } - log.Debug().Msgf("Request for model: %s", modelFile) - - if input.Backend != "" { - cfg.Backend = input.Backend - } - - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg) - if err != nil { - return err - } - return c.Download(filePath) + return c.Download(*rawValue.Value) } } diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index dceb3789..72cb8b4a 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find ")) + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistantID %q", assistantID)) } } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 36d1142b..a240b024 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -5,17 +5,11 @@ import ( "bytes" "encoding/json" "fmt" - "strings" - "time" - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" - model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/go-skynet/LocalAI/core/services" "github.com/gofiber/fiber/v2" - "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -25,412 +19,82 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] -func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { - emptyMessage := "" - id := uuid.New().String() - created := int(time.Now().Unix()) - - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - resp := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, - } - - responses <- resp - return true - }) - close(responses) - } - processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - result := "" - _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - result += s - // TODO: Change generated BNF grammar to be compliant with the schema so we can - // stream the result token by token here. - return true - }) - - results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls) - noActionToRun := len(results) > 0 && results[0].name == noAction - - switch { - case noActionToRun: - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) - if err != nil { - log.Error().Err(err).Msg("error handling question") - return - } - - resp := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, - } - - responses <- resp - - default: - for i, ss := range results { - name, args := ss.name, ss.arguments - - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - responses <- schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Arguments: args, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } - } - } - - close(responses) - } - +func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - processFunctions := false - funcs := grammar.Functions{} - modelFile, input, err := readRequest(c, ml, startupOptions, true) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return fmt.Errorf("failed reading parameters from request: %w", err) } - config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16) + traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) - } - log.Debug().Msgf("Configuration read: %+v", config) - - // Allow the user to set custom actions via config file - // to be "embedded" in each model - noActionName := "answer" - noActionDescription := "use this action to answer without performing any action" - - if config.FunctionsConfig.NoActionFunctionName != "" { - noActionName = config.FunctionsConfig.NoActionFunctionName - } - if config.FunctionsConfig.NoActionDescriptionName != "" { - noActionDescription = config.FunctionsConfig.NoActionDescriptionName + return err } - if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF - } + if request.Stream { - config.Grammar = input.Grammar + log.Debug().Msgf("Chat Stream request received") - // process functions if we have any defined or if we have a function call string - if len(input.Functions) > 0 && config.ShouldUseFunctions() { - log.Debug().Msgf("Response needs to process functions") - - processFunctions = true - - noActionGrammar := grammar.Function{ - Name: noActionName, - Description: noActionDescription, - Parameters: map[string]interface{}{ - "properties": map[string]interface{}{ - "message": map[string]interface{}{ - "type": "string", - "description": "The message to reply the user with", - }}, - }, - } - - // Append the no action function - funcs = append(funcs, input.Functions...) - if !config.FunctionsConfig.DisableNoAction { - funcs = append(funcs, noActionGrammar) - } - - // Force picking one of the functions by the request - if config.FunctionToCall() != "" { - funcs = funcs.Select(config.FunctionToCall()) - } - - // Update input grammar - jsStruct := funcs.ToJSONStructure() - config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls) - } else if input.JSONFunctionGrammarObject != nil { - config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls) - } - - // functions are not supported in stream mode (yet?) - toStream := input.Stream - - log.Debug().Msgf("Parameters: %+v", config) - - var predInput string - - // If we are using the tokenizer template, we don't need to process the messages - // unless we are processing functions - if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { - - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range input.Messages { - var content string - role := i.Role - - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := config.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := config.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" - - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if config.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: config.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(input.Messages) - 1), - Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf - } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage - } - } - - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) - } else { - content = fmt.Sprint(r, " ", string(j)) - } - } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) - } else { - content = string(j) - } - } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAnyRole(i.ToolCalls) - } - } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true - } - } - - mess = append(mess, content) - } - - predInput = strings.Join(mess, "\n") - log.Debug().Msgf("Prompt (before templating): %s", predInput) - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Chat != "" && !processFunctions { - templateFile = config.TemplateConfig.Chat - } - - if config.TemplateConfig.Functions != "" && processFunctions { - templateFile = config.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } - - log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { - log.Debug().Msgf("Grammar: %+v", config.Grammar) - } - } - - switch { - case toStream: - - log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) - // c.Set("Content-Type", "text/event-stream") + // c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") - responses := make(chan schema.OpenAIResponse) - - if !processFunctions { - go process(predInput, input, config, ml, responses) - } else { - go processTools(noActionName, predInput, input, config, ml, responses) - } - c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { usage := &schema.OpenAIUsage{} toolsCalled := false - for ev := range responses { - usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it - if len(ev.Choices[0].Delta.ToolCalls) > 0 { + for ev := range tokenChannel { + if ev.Error != nil { + log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error") + request.Cancel() + break + } + usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it + + if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 { toolsCalled = true } var buf bytes.Buffer enc := json.NewEncoder(&buf) - enc.Encode(ev) - log.Debug().Msgf("Sending chunk: %s", buf.String()) + if ev.Error != nil { + log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler") + enc.Encode(ev.Error) + } else { + enc.Encode(ev.Value) + } + log.Debug().Msgf("chat streaming sending chunk: %s", buf.String()) _, err := fmt.Fprintf(w, "data: %v\n", buf.String()) if err != nil { - log.Debug().Msgf("Sending chunk failed: %v", err) - input.Cancel() + log.Debug().Err(err).Msgf("Sending chunk failed") + request.Cancel() + break + } + err = w.Flush() + if err != nil { + log.Debug().Msg("error while flushing, closing connection") + request.Cancel() break } - w.Flush() } finishReason := "stop" if toolsCalled { finishReason = "tool_calls" - } else if toolsCalled && len(input.Tools) == 0 { + } else if toolsCalled && len(request.Tools) == 0 { finishReason = "function_call" } resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { FinishReason: finishReason, Index: 0, - Delta: &schema.Message{Content: &emptyMessage}, + Delta: &schema.Message{Content: ""}, }}, Object: "chat.completion.chunk", Usage: *usage, @@ -441,202 +105,21 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup w.WriteString("data: [DONE]\n\n") w.Flush() })) + return nil - - // no streaming mode - default: - result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) { - if !processFunctions { - // no function is called, just reply and use stop as finish reason - *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) - return - } - - results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls) - noActionsToRun := len(results) > 0 && results[0].name == noActionName - - switch { - case noActionsToRun: - result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) - if err != nil { - log.Error().Err(err).Msg("error handling question") - return - } - *c = append(*c, schema.Choice{ - Message: &schema.Message{Role: "assistant", Content: &result}}) - default: - toolChoice := schema.Choice{ - Message: &schema.Message{ - Role: "assistant", - }, - } - - if len(input.Tools) > 0 { - toolChoice.FinishReason = "tool_calls" - } - - for _, ss := range results { - name, args := ss.name, ss.arguments - if len(input.Tools) > 0 { - // If we are using tools, we condense the function calls into - // a single response choice with all the tools - toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls, - schema.ToolCall{ - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - Arguments: args, - }, - }, - ) - } else { - // otherwise we return more choices directly - *c = append(*c, schema.Choice{ - FinishReason: "function_call", - Message: &schema.Message{ - Role: "assistant", - FunctionCall: map[string]interface{}{ - "name": name, - "arguments": args, - }, - }, - }) - } - } - - if len(input.Tools) > 0 { - // we need to append our result if we are using tools - *c = append(*c, toolChoice) - } - } - - }, nil) - if err != nil { - return err - } - - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "chat.completion", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, - } - respData, _ := json.Marshal(resp) - log.Debug().Msgf("Response: %s", respData) - - // Return the prediction in the response body - return c.JSON(resp) } + // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? + rawResponse := <-finalResultChannel + + if rawResponse.Error != nil { + return rawResponse.Error + } + + jsonResult, _ := json.Marshal(rawResponse.Value) + log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response") + + // Return the prediction in the response body + return c.JSON(rawResponse.Value) } } - -func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) { - log.Debug().Msgf("nothing to do, computing a reply") - - // If there is a message that the LLM already sends as part of the JSON reply, use it - arguments := map[string]interface{}{} - json.Unmarshal([]byte(args), &arguments) - m, exists := arguments["message"] - if exists { - switch message := m.(type) { - case string: - if message != "" { - log.Debug().Msgf("Reply received from LLM: %s", message) - message = backend.Finetune(*config, prompt, message) - log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) - - return message, nil - } - } - } - - log.Debug().Msgf("No action received from LLM, without a message, computing a reply") - // Otherwise ask the LLM to understand the JSON output and the context, and return a message - // Note: This costs (in term of CPU/GPU) another computation - config.Grammar = "" - images := []string{} - for _, m := range input.Messages { - images = append(images, m.StringImages...) - } - - predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil) - if err != nil { - log.Error().Err(err).Msg("model inference failed") - return "", err - } - - prediction, err := predFunc() - if err != nil { - log.Error().Err(err).Msg("prediction failed") - return "", err - } - return backend.Finetune(*config, prompt, prediction.Response), nil -} - -type funcCallResults struct { - name string - arguments string -} - -func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { - results := []funcCallResults{} - - // TODO: use generics to avoid this code duplication - if multipleResults { - ss := []map[string]interface{}{} - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - for _, s := range ss { - func_name, ok := s["function"] - if !ok { - continue - } - args, ok := s["arguments"] - if !ok { - continue - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - continue - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - } else { - // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name, ok := ss["function"] - if !ok { - return results - } - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - if !ok { - return results - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - return results - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - - return results -} diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 69923475..d8b412a9 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -4,18 +4,13 @@ import ( "bufio" "bytes" "encoding/json" - "errors" "fmt" - "time" - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" - model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" - "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -25,116 +20,50 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/completions [post] -func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { - id := uuid.New().String() - created := int(time.Now().Unix()) - - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - resp := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{ - { - Index: 0, - Text: s, - }, - }, - Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, - } - log.Debug().Msgf("Sending goroutine: %s", s) - - responses <- resp - return true - }) - close(responses) - } - +func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, ml, appConfig, true) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - log.Debug().Msgf("`input`: %+v", input) + log.Debug().Msgf("`OpenAIRequest`: %+v", request) - config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + traceID, finalResultChannel, _, _, tokenChannel, err := oais.Completion(request, false, request.Stream) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return err } - if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF - } + if request.Stream { + log.Debug().Msgf("Completion Stream request received") - config.Grammar = input.Grammar - - log.Debug().Msgf("Parameter Config: %+v", config) - - if input.Stream { - log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) //c.Set("Content-Type", "text/event-stream") c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") - } - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Completion != "" { - templateFile = config.TemplateConfig.Completion - } - - if input.Stream { - if len(config.PromptStrings) > 1 { - return errors.New("cannot handle more than 1 `PromptStrings` when Streaming") - } - - predInput := config.PromptStrings[0] - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - Input: predInput, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } - } - - responses := make(chan schema.OpenAIResponse) - - go process(predInput, input, config, ml, responses) c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { - - for ev := range responses { + for ev := range tokenChannel { var buf bytes.Buffer enc := json.NewEncoder(&buf) - enc.Encode(ev) + if ev.Error != nil { + log.Debug().Msgf("[CompletionEndpoint] error to debug during tokenChannel handler: %q", ev.Error) + enc.Encode(ev.Error) + } else { + enc.Encode(ev.Value) + } - log.Debug().Msgf("Sending chunk: %s", buf.String()) + log.Debug().Msgf("completion streaming sending chunk: %s", buf.String()) fmt.Fprintf(w, "data: %v\n", buf.String()) w.Flush() } resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { Index: 0, @@ -151,55 +80,15 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a })) return nil } - - var result []schema.Choice - - totalTokenUsage := backend.TokenUsage{} - - for k, i := range config.PromptStrings { - if templateFile != "" { - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - Input: i, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } - } - - r, tokenUsage, err := ComputeChoices( - input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { - *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k}) - }, nil) - if err != nil { - return err - } - - totalTokenUsage.Prompt += tokenUsage.Prompt - totalTokenUsage.Completion += tokenUsage.Completion - - result = append(result, r...) + // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? + rawResponse := <-finalResultChannel + if rawResponse.Error != nil { + return rawResponse.Error } - - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, - } - - jsonResult, _ := json.Marshal(resp) + jsonResult, _ := json.Marshal(rawResponse.Value) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index 25497095..a33050dd 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -3,92 +3,36 @@ package openai import ( "encoding/json" "fmt" - "time" - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/services" - "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" - "github.com/google/uuid" "github.com/rs/zerolog/log" ) -func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func EditEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, ml, appConfig, true) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + _, finalResultChannel, _, _, _, err := oais.Edit(request, false, request.Stream) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return err } - log.Debug().Msgf("Parameter Config: %+v", config) - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model + rawResponse := <-finalResultChannel + if rawResponse.Error != nil { + return rawResponse.Error } - if config.TemplateConfig.Edit != "" { - templateFile = config.TemplateConfig.Edit - } - - var result []schema.Choice - totalTokenUsage := backend.TokenUsage{} - - for _, i := range config.InputStrings { - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ - Input: i, - Instruction: input.Instruction, - SystemPrompt: config.SystemPrompt, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } - } - - r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { - *c = append(*c, schema.Choice{Text: s}) - }, nil) - if err != nil { - return err - } - - totalTokenUsage.Prompt += tokenUsage.Prompt - totalTokenUsage.Completion += tokenUsage.Completion - - result = append(result, r...) - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "edit", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, - } - - jsonResult, _ := json.Marshal(resp) + jsonResult, _ := json.Marshal(rawResponse.Value) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index eca34f79..be546991 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -3,14 +3,9 @@ package openai import ( "encoding/json" "fmt" - "time" "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/pkg/model" - - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -21,63 +16,25 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/embeddings [post] -func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func EmbeddingsEndpoint(fce *fiberContext.FiberContextExtractor, ebs *backend.EmbeddingsBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - model, input, err := readRequest(c, ml, appConfig, true) + _, input, err := fce.OpenAIRequestFromContext(c, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) - if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + responseChannel := ebs.Embeddings(input) + + rawResponse := <-responseChannel + + if rawResponse.Error != nil { + return rawResponse.Error } - log.Debug().Msgf("Parameter Config: %+v", config) - items := []schema.Item{} - - for i, s := range config.InputToken { - // get the model function to call for the result - embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig) - if err != nil { - return err - } - - embeddings, err := embedFn() - if err != nil { - return err - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - for i, s := range config.InputStrings { - // get the model function to call for the result - embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig) - if err != nil { - return err - } - - embeddings, err := embedFn() - if err != nil { - return err - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Data: items, - Object: "list", - } - - jsonResult, _ := json.Marshal(resp) + jsonResult, _ := json.Marshal(rawResponse.Value) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 9e806b3e..ec3d84da 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -1,50 +1,18 @@ package openai import ( - "bufio" - "encoding/base64" "encoding/json" "fmt" - "io" - "net/http" - "os" - "path/filepath" - "strconv" - "strings" - "time" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/go-skynet/LocalAI/core/backend" - model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) -func downloadFile(url string) (string, error) { - // Get the data - resp, err := http.Get(url) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // Create the file - out, err := os.CreateTemp("", "image") - if err != nil { - return "", err - } - defer out.Close() - - // Write the body to file - _, err = io.Copy(out, resp.Body) - return out.Name(), err -} - -// +// https://platform.openai.com/docs/api-reference/images/create /* * @@ -59,186 +27,36 @@ func downloadFile(url string) (string, error) { * */ + // ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create // @Summary Creates an image given a prompt. // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/images/generations [post] -func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func ImageEndpoint(fce *fiberContext.FiberContextExtractor, igbs *backend.ImageGenerationBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, ml, appConfig, false) + // TODO: Somewhat a hack. Is there a better place to assign this? + if igbs.BaseUrlForGeneratedImages == "" { + igbs.BaseUrlForGeneratedImages = c.BaseURL() + "/generated-images/" + } + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - if m == "" { - m = model.StableDiffusionBackend - } - log.Debug().Msgf("Loading model: %+v", m) + responseChannel := igbs.GenerateImage(request) + rawResponse := <-responseChannel - config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false) + if rawResponse.Error != nil { + return rawResponse.Error + } + + jsonResult, err := json.Marshal(rawResponse.Value) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return err } - - src := "" - if input.File != "" { - - fileData := []byte{} - // check if input.File is an URL, if so download it and save it - // to a temporary file - if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") { - out, err := downloadFile(input.File) - if err != nil { - return fmt.Errorf("failed downloading file:%w", err) - } - defer os.RemoveAll(out) - - fileData, err = os.ReadFile(out) - if err != nil { - return fmt.Errorf("failed reading file:%w", err) - } - - } else { - // base 64 decode the file and write it somewhere - // that we will cleanup - fileData, err = base64.StdEncoding.DecodeString(input.File) - if err != nil { - return err - } - } - - // Create a temporary file - outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64") - if err != nil { - return err - } - // write the base64 result - writer := bufio.NewWriter(outputFile) - _, err = writer.Write(fileData) - if err != nil { - outputFile.Close() - return err - } - outputFile.Close() - src = outputFile.Name() - defer os.RemoveAll(src) - } - - log.Debug().Msgf("Parameter Config: %+v", config) - - switch config.Backend { - case "stablediffusion": - config.Backend = model.StableDiffusionBackend - case "tinydream": - config.Backend = model.TinyDreamBackend - case "": - config.Backend = model.StableDiffusionBackend - } - - sizeParts := strings.Split(input.Size, "x") - if len(sizeParts) != 2 { - return fmt.Errorf("invalid value for 'size'") - } - width, err := strconv.Atoi(sizeParts[0]) - if err != nil { - return fmt.Errorf("invalid value for 'size'") - } - height, err := strconv.Atoi(sizeParts[1]) - if err != nil { - return fmt.Errorf("invalid value for 'size'") - } - - b64JSON := false - if input.ResponseFormat.Type == "b64_json" { - b64JSON = true - } - // src and clip_skip - var result []schema.Item - for _, i := range config.PromptStrings { - n := input.N - if input.N == 0 { - n = 1 - } - for j := 0; j < n; j++ { - prompts := strings.Split(i, "|") - positive_prompt := prompts[0] - negative_prompt := "" - if len(prompts) > 1 { - negative_prompt = prompts[1] - } - - mode := 0 - step := config.Step - if step == 0 { - step = 15 - } - - if input.Mode != 0 { - mode = input.Mode - } - - if input.Step != 0 { - step = input.Step - } - - tempDir := "" - if !b64JSON { - tempDir = appConfig.ImageDir - } - // Create a temporary file - outputFile, err := os.CreateTemp(tempDir, "b64") - if err != nil { - return err - } - outputFile.Close() - output := outputFile.Name() + ".png" - // Rename the temporary file - err = os.Rename(outputFile.Name(), output) - if err != nil { - return err - } - - baseURL := c.BaseURL() - - fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig) - if err != nil { - return err - } - if err := fn(); err != nil { - return err - } - - item := &schema.Item{} - - if b64JSON { - defer os.RemoveAll(output) - data, err := os.ReadFile(output) - if err != nil { - return err - } - item.B64JSON = base64.StdEncoding.EncodeToString(data) - } else { - base := filepath.Base(output) - item.URL = baseURL + "/generated-images/" + base - } - - result = append(result, *item) - } - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Data: result, - } - - jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) - // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go deleted file mode 100644 index 06e784b7..00000000 --- a/core/http/endpoints/openai/inference.go +++ /dev/null @@ -1,55 +0,0 @@ -package openai - -import ( - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - - "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" -) - -func ComputeChoices( - req *schema.OpenAIRequest, - predInput string, - config *config.BackendConfig, - o *config.ApplicationConfig, - loader *model.ModelLoader, - cb func(string, *[]schema.Choice), - tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) { - n := req.N // number of completions to return - result := []schema.Choice{} - - if n == 0 { - n = 1 - } - - images := []string{} - for _, m := range req.Messages { - images = append(images, m.StringImages...) - } - - // get the model function to call for the result - predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback) - if err != nil { - return result, backend.TokenUsage{}, err - } - - tokenUsage := backend.TokenUsage{} - - for i := 0; i < n; i++ { - prediction, err := predFunc() - if err != nil { - return result, backend.TokenUsage{}, err - } - - tokenUsage.Prompt += prediction.Usage.Prompt - tokenUsage.Completion += prediction.Usage.Completion - - finetunedResponse := backend.Finetune(*config, predInput, prediction.Response) - cb(finetunedResponse, &result) - - //result = append(result, Choice{Text: prediction}) - - } - return result, tokenUsage, err -} diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 04e611a2..9bb2b2ca 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -1,61 +1,21 @@ package openai import ( - "regexp" - - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/core/services" "github.com/gofiber/fiber/v2" ) -func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { - models, err := ml.ListModels() - if err != nil { - return err - } - var mm map[string]interface{} = map[string]interface{}{} - - dataModels := []schema.OpenAIModel{} - - var filterFn func(name string) bool + // If blank, no filter is applied. filter := c.Query("filter") - - // If filter is not specified, do not filter the list by model name - if filter == "" { - filterFn = func(_ string) bool { return true } - } else { - // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn - rxp, err := regexp.Compile(filter) - if err != nil { - return err - } - filterFn = func(name string) bool { - return rxp.MatchString(name) - } - } - // By default, exclude any loose files that are already referenced by a configuration file. excludeConfigured := c.QueryBool("excludeConfigured", true) - // Start with the known configurations - for _, c := range cl.GetAllBackendConfigs() { - if excludeConfigured { - mm[c.Model] = nil - } - - if filterFn(c.Name) { - dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) - } - } - - // Then iterate through the loose files: - for _, m := range models { - // And only adds them if they shouldn't be skipped. - if _, exists := mm[m]; !exists && filterFn(m) { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) - } + dataModels, err := lms.ListModels(filter, excludeConfigured) + if err != nil { + return err } return c.JSON(struct { diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go deleted file mode 100644 index 369fb0b8..00000000 --- a/core/http/endpoints/openai/request.go +++ /dev/null @@ -1,285 +0,0 @@ -package openai - -import ( - "context" - "encoding/base64" - "encoding/json" - "fmt" - "io" - "net/http" - "strings" - - "github.com/go-skynet/LocalAI/core/config" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" - model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/gofiber/fiber/v2" - "github.com/rs/zerolog/log" -) - -func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { - input := new(schema.OpenAIRequest) - - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return "", nil, fmt.Errorf("failed parsing request body: %w", err) - } - - received, _ := json.Marshal(input) - - ctx, cancel := context.WithCancel(o.Context) - input.Context = ctx - input.Cancel = cancel - - log.Debug().Msgf("Request received: %s", string(received)) - - modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel) - - return modelFile, input, err -} - -// this function check if the string is an URL, if it's an URL downloads the image in memory -// encodes it in base64 and returns the base64 string -func getBase64Image(s string) (string, error) { - if strings.HasPrefix(s, "http") { - // download the image - resp, err := http.Get(s) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // read the image data into memory - data, err := io.ReadAll(resp.Body) - if err != nil { - return "", err - } - - // encode the image data in base64 - encoded := base64.StdEncoding.EncodeToString(data) - - // return the base64 string - return encoded, nil - } - - // if the string instead is prefixed with "data:image/jpeg;base64,", drop it - if strings.HasPrefix(s, "data:image/jpeg;base64,") { - return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil - } - return "", fmt.Errorf("not valid string") -} - -func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) { - if input.Echo { - config.Echo = input.Echo - } - if input.TopK != nil { - config.TopK = input.TopK - } - if input.TopP != nil { - config.TopP = input.TopP - } - - if input.Backend != "" { - config.Backend = input.Backend - } - - if input.ClipSkip != 0 { - config.Diffusers.ClipSkip = input.ClipSkip - } - - if input.ModelBaseName != "" { - config.AutoGPTQ.ModelBaseName = input.ModelBaseName - } - - if input.NegativePromptScale != 0 { - config.NegativePromptScale = input.NegativePromptScale - } - - if input.UseFastTokenizer { - config.UseFastTokenizer = input.UseFastTokenizer - } - - if input.NegativePrompt != "" { - config.NegativePrompt = input.NegativePrompt - } - - if input.RopeFreqBase != 0 { - config.RopeFreqBase = input.RopeFreqBase - } - - if input.RopeFreqScale != 0 { - config.RopeFreqScale = input.RopeFreqScale - } - - if input.Grammar != "" { - config.Grammar = input.Grammar - } - - if input.Temperature != nil { - config.Temperature = input.Temperature - } - - if input.Maxtokens != nil { - config.Maxtokens = input.Maxtokens - } - - switch stop := input.Stop.(type) { - case string: - if stop != "" { - config.StopWords = append(config.StopWords, stop) - } - case []interface{}: - for _, pp := range stop { - if s, ok := pp.(string); ok { - config.StopWords = append(config.StopWords, s) - } - } - } - - if len(input.Tools) > 0 { - for _, tool := range input.Tools { - input.Functions = append(input.Functions, tool.Function) - } - } - - if input.ToolsChoice != nil { - var toolChoice grammar.Tool - - switch content := input.ToolsChoice.(type) { - case string: - _ = json.Unmarshal([]byte(content), &toolChoice) - case map[string]interface{}: - dat, _ := json.Marshal(content) - _ = json.Unmarshal(dat, &toolChoice) - } - input.FunctionCall = map[string]interface{}{ - "name": toolChoice.Function.Name, - } - } - - // Decode each request's message content - index := 0 - for i, m := range input.Messages { - switch content := m.Content.(type) { - case string: - input.Messages[i].StringContent = content - case []interface{}: - dat, _ := json.Marshal(content) - c := []schema.Content{} - json.Unmarshal(dat, &c) - for _, pp := range c { - if pp.Type == "text" { - input.Messages[i].StringContent = pp.Text - } else if pp.Type == "image_url" { - // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: - base64, err := getBase64Image(pp.ImageURL.URL) - if err == nil { - input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff - // set a placeholder for each image - input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent - index++ - } else { - fmt.Print("Failed encoding image", err) - } - } - } - } - } - - if input.RepeatPenalty != 0 { - config.RepeatPenalty = input.RepeatPenalty - } - - if input.FrequencyPenalty != 0 { - config.FrequencyPenalty = input.FrequencyPenalty - } - - if input.PresencePenalty != 0 { - config.PresencePenalty = input.PresencePenalty - } - - if input.Keep != 0 { - config.Keep = input.Keep - } - - if input.Batch != 0 { - config.Batch = input.Batch - } - - if input.IgnoreEOS { - config.IgnoreEOS = input.IgnoreEOS - } - - if input.Seed != nil { - config.Seed = input.Seed - } - - if input.TypicalP != nil { - config.TypicalP = input.TypicalP - } - - switch inputs := input.Input.(type) { - case string: - if inputs != "" { - config.InputStrings = append(config.InputStrings, inputs) - } - case []interface{}: - for _, pp := range inputs { - switch i := pp.(type) { - case string: - config.InputStrings = append(config.InputStrings, i) - case []interface{}: - tokens := []int{} - for _, ii := range i { - tokens = append(tokens, int(ii.(float64))) - } - config.InputToken = append(config.InputToken, tokens) - } - } - } - - // Can be either a string or an object - switch fnc := input.FunctionCall.(type) { - case string: - if fnc != "" { - config.SetFunctionCallString(fnc) - } - case map[string]interface{}: - var name string - n, exists := fnc["name"] - if exists { - nn, e := n.(string) - if e { - name = nn - } - } - config.SetFunctionCallNameString(name) - } - - switch p := input.Prompt.(type) { - case string: - config.PromptStrings = append(config.PromptStrings, p) - case []interface{}: - for _, pp := range p { - if s, ok := pp.(string); ok { - config.PromptStrings = append(config.PromptStrings, s) - } - } - } -} - -func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { - cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath, - config.LoadOptionDebug(debug), - config.LoadOptionThreads(threads), - config.LoadOptionContextSize(ctx), - config.LoadOptionF16(f16), - ) - - // Set the parameters for the language model prediction - updateRequestConfig(cfg, input) - - return cfg, input, err -} diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go index c7dd39e7..572cec12 100644 --- a/core/http/endpoints/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -9,8 +9,7 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - model "github.com/go-skynet/LocalAI/pkg/model" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -23,17 +22,15 @@ import ( // @Param file formData file true "file" // @Success 200 {object} map[string]string "Response" // @Router /v1/audio/transcriptions [post] -func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.TranscriptionBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, ml, appConfig, false) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) - if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) - } + // TODO: Investigate this file copy stuff later - potentially belongs in service. + // retrieve the file data from the request file, err := c.FormFile("file") if err != nil { @@ -65,13 +62,16 @@ func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a log.Debug().Msgf("Audio file copied to: %+v", dst) - tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig) - if err != nil { - return err - } + request.File = dst - log.Debug().Msgf("Trascribed: %+v", tr) + responseChannel := tbs.Transcribe(request) + rawResponse := <-responseChannel + + if rawResponse.Error != nil { + return rawResponse.Error + } + log.Debug().Msgf("Transcribed: %+v", rawResponse.Value) // TODO: handle different outputs here - return c.Status(http.StatusOK).JSON(tr) + return c.Status(http.StatusOK).JSON(rawResponse.Value) } } diff --git a/core/schema/whisper.go b/core/schema/transcription.go similarity index 90% rename from core/schema/whisper.go rename to core/schema/transcription.go index 41413c1f..fe1799fa 100644 --- a/core/schema/whisper.go +++ b/core/schema/transcription.go @@ -10,7 +10,7 @@ type Segment struct { Tokens []int `json:"tokens"` } -type Result struct { +type TranscriptionResult struct { Segments []Segment `json:"segments"` Text string `json:"text"` } diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go index 979a67a3..a610432c 100644 --- a/core/services/backend_monitor.go +++ b/core/services/backend_monitor.go @@ -15,22 +15,22 @@ import ( gopsutil "github.com/shirou/gopsutil/v3/process" ) -type BackendMonitor struct { +type BackendMonitorService struct { configLoader *config.BackendConfigLoader modelLoader *model.ModelLoader options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. } -func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor { - return BackendMonitor{ +func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService { + return &BackendMonitorService{ configLoader: configLoader, modelLoader: modelLoader, options: appConfig, } } -func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) { - config, exists := bm.configLoader.GetBackendConfig(modelName) +func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) { + config, exists := bms.configLoader.GetBackendConfig(modelName) var backendId string if exists { backendId = config.Model @@ -46,8 +46,8 @@ func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string return backendId, nil } -func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { - config, exists := bm.configLoader.GetBackendConfig(model) +func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { + config, exists := bms.configLoader.GetBackendConfig(model) var backend string if exists { backend = config.Model @@ -60,7 +60,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe backend = fmt.Sprintf("%s.bin", backend) } - pid, err := bm.modelLoader.GetGRPCPID(backend) + pid, err := bms.modelLoader.GetGRPCPID(backend) if err != nil { log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid") @@ -101,12 +101,12 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe }, nil } -func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return nil, err } - modelAddr := bm.modelLoader.CheckIsLoaded(backendId) + modelAddr := bms.modelLoader.CheckIsLoaded(backendId) if modelAddr == "" { return nil, fmt.Errorf("backend %s is not currently loaded", backendId) } @@ -114,7 +114,7 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO()) if rpcErr != nil { log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) - val, slbErr := bm.SampleLocalBackendProcess(backendId) + val, slbErr := bms.SampleLocalBackendProcess(backendId) if slbErr != nil { return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) } @@ -131,10 +131,10 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse return status, nil } -func (bm BackendMonitor) ShutdownModel(modelName string) error { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) ShutdownModel(modelName string) error { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return err } - return bm.modelLoader.ShutdownModel(backendId) + return bms.modelLoader.ShutdownModel(backendId) } diff --git a/core/services/gallery.go b/core/services/gallery.go index b068abbb..1ef8e3e2 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -3,14 +3,18 @@ package services import ( "context" "encoding/json" + "errors" "os" + "path/filepath" "strings" "sync" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/embedded" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/gallery" - "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" "gopkg.in/yaml.v2" ) @@ -29,18 +33,6 @@ func NewGalleryService(modelPath string) *GalleryService { } } -func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error { - - config, err := gallery.GetGalleryConfigFromURL(req.URL) - if err != nil { - return err - } - - config.Files = append(config.Files, req.AdditionalFiles...) - - return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) -} - func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) { g.Lock() defer g.Unlock() @@ -92,10 +84,10 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) } } else if op.ConfigURL != "" { - startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) err = cl.Preload(g.modelPath) } else { - err = prepareModel(g.modelPath, op.Req, cl, progressCallback) + err = prepareModel(g.modelPath, op.Req, progressCallback) } if err != nil { @@ -127,13 +119,12 @@ type galleryModel struct { ID string `json:"id"` } -func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error { +func processRequests(modelPath string, galleries []gallery.Gallery, requests []galleryModel) error { var err error for _, r := range requests { utils.ResetDownloadTimers() if r.ID == "" { - err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) - + err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction) } else { if strings.Contains(r.ID, "@") { err = gallery.InstallModelFromGallery( @@ -158,7 +149,7 @@ func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, g return err } - return processRequests(modelPath, s, cl, galleries, requests) + return processRequests(modelPath, galleries, requests) } func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error { @@ -168,5 +159,90 @@ func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, return err } - return processRequests(modelPath, s, cl, galleries, requests) + return processRequests(modelPath, galleries, requests) +} + +// PreloadModelsConfigurations will preload models from the given list of URLs +// It will download the model if it is not already present in the model path +// It will also try to resolve if the model is an embedded model YAML configuration +func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { + for _, url := range models { + + // As a best effort, try to resolve the model from the remote library + // if it's not resolved we try with the other method below + if modelLibraryURL != "" { + lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) + if err == nil { + if lib[url] != "" { + log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) + url = lib[url] + } + } + } + + url = embedded.ModelShortURL(url) + switch { + case embedded.ExistsInModelsLibrary(url): + modelYAML, err := embedded.ResolveContent(url) + // If we resolve something, just save it to disk and continue + if err != nil { + log.Error().Err(err).Msg("error resolving model content") + continue + } + + log.Debug().Msgf("[startup] resolved embedded model: %s", url) + md5Name := utils.MD5(url) + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") + } + case downloader.LooksLikeURL(url): + log.Debug().Msgf("[startup] resolved model to download: %s", url) + + // md5 of model name + md5Name := utils.MD5(url) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + }) + if err != nil { + log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") + } + } + default: + if _, err := os.Stat(url); err == nil { + log.Debug().Msgf("[startup] resolved local model: %s", url) + // copy to modelPath + md5Name := utils.MD5(url) + + modelYAML, err := os.ReadFile(url) + if err != nil { + log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") + continue + } + + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") + } + } else { + log.Warn().Msgf("[startup] failed resolving model '%s'", url) + } + } + } +} + +func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64)) error { + + config, err := gallery.GetGalleryConfigFromURL(req.URL) + if err != nil { + return err + } + + config.Files = append(config.Files, req.AdditionalFiles...) + + return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) } diff --git a/core/services/list_models.go b/core/services/list_models.go new file mode 100644 index 00000000..a21e6faf --- /dev/null +++ b/core/services/list_models.go @@ -0,0 +1,72 @@ +package services + +import ( + "regexp" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type ListModelsService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig +} + +func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService { + return &ListModelsService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + } +} + +func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { + + models, err := lms.ml.ListModels() + if err != nil { + return nil, err + } + + var mm map[string]interface{} = map[string]interface{}{} + + dataModels := []schema.OpenAIModel{} + + var filterFn func(name string) bool + + // If filter is not specified, do not filter the list by model name + if filter == "" { + filterFn = func(_ string) bool { return true } + } else { + // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn + rxp, err := regexp.Compile(filter) + if err != nil { + return nil, err + } + filterFn = func(name string) bool { + return rxp.MatchString(name) + } + } + + // Start with the known configurations + for _, c := range lms.bcl.GetAllBackendConfigs() { + if excludeConfigured { + mm[c.Model] = nil + } + + if filterFn(c.Name) { + dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) + } + } + + // Then iterate through the loose files: + for _, m := range models { + // And only adds them if they shouldn't be skipped. + if _, exists := mm[m]; !exists && filterFn(m) { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } + } + + return dataModels, nil +} diff --git a/pkg/startup/model_preload_test.go b/core/services/model_preload_test.go similarity index 96% rename from pkg/startup/model_preload_test.go rename to core/services/model_preload_test.go index 63a8f8b0..fc65d565 100644 --- a/pkg/startup/model_preload_test.go +++ b/core/services/model_preload_test.go @@ -1,13 +1,14 @@ -package startup_test +package services_test import ( "fmt" "os" "path/filepath" - . "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" + . "github.com/go-skynet/LocalAI/core/services" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/core/services/openai.go b/core/services/openai.go new file mode 100644 index 00000000..0f61d6f4 --- /dev/null +++ b/core/services/openai.go @@ -0,0 +1,805 @@ +package services + +import ( + "encoding/json" + "errors" + "fmt" + "strings" + "sync" + "time" + + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/concurrency" + "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/google/uuid" + "github.com/imdario/mergo" + "github.com/rs/zerolog/log" +) + +type endpointGenerationConfigurationFn func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration + +type endpointConfiguration struct { + SchemaObject string + TemplatePath string + TemplateData model.PromptTemplateData + ResultMappingFn func(resp *backend.LLMResponse, index int) schema.Choice + CompletionMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] + TokenMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] +} + +// TODO: This is used for completion and edit. I am pretty sure I forgot parts, but fix it later. +func simpleMapper(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { + if resp.Error != nil || resp.Value == nil { + return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} + } + return concurrency.ErrorOr[*schema.OpenAIResponse]{ + Value: &schema.OpenAIResponse{ + Choices: []schema.Choice{ + { + Text: resp.Value.Response, + }, + }, + Usage: schema.OpenAIUsage{ + PromptTokens: resp.Value.Usage.Prompt, + CompletionTokens: resp.Value.Usage.Completion, + TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, + }, + }, + } +} + +// TODO: Consider alternative names for this. +// The purpose of this struct is to hold a reference to the OpenAI request context information +// This keeps things simple within core/services/openai.go and allows consumers to "see" this information if they need it +type OpenAIRequestTraceID struct { + ID string + Created int +} + +// This type split out from core/backend/llm.go - I'm still not _totally_ sure about this, but it seems to make sense to keep the generic LLM code from the OpenAI specific higher level functionality +type OpenAIService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig + llmbs *backend.LLMBackendService +} + +func NewOpenAIService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig, llmbs *backend.LLMBackendService) *OpenAIService { + return &OpenAIService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + llmbs: llmbs, + } +} + +// Keeping in place as a reminder to POTENTIALLY ADD MORE VALIDATION HERE??? +func (oais *OpenAIService) getConfig(request *schema.OpenAIRequest) (*config.BackendConfig, *schema.OpenAIRequest, error) { + return oais.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, oais.appConfig) +} + +// TODO: It would be a lot less messy to make a return struct that had references to each of these channels +// INTENTIONALLY not doing that quite yet - I believe we need to let the references to unused channels die for the GC to automatically collect -- can we manually free()? +// finalResultsChannel is the primary async return path: one result for the entire request. +// promptResultsChannels is DUBIOUS. It's expected to be raw fan-out used within the function itself, but I am exposing for testing? One bundle of LLMResponseBundle per PromptString? Gets all N completions for a single prompt. +// completionsChannel is a channel that emits one *LLMResponse per generated completion, be that different prompts or N. Seems the most useful other than "entire request" Request is available to attempt tracing??? +// tokensChannel is a channel that emits one *LLMResponse per generated token. Let's see what happens! +func (oais *OpenAIService) Completion(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { + return endpointConfiguration{ + SchemaObject: "text_completion", + TemplatePath: bc.TemplateConfig.Completion, + TemplateData: model.PromptTemplateData{ + SystemPrompt: bc.SystemPrompt, + }, + ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { + return schema.Choice{ + Index: promptIndex, + FinishReason: "stop", + Text: resp.Response, + } + }, + CompletionMappingFn: simpleMapper, + TokenMappingFn: simpleMapper, + } + }, notifyOnPromptResult, notifyOnToken, nil) +} + +func (oais *OpenAIService) Edit(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { + + return endpointConfiguration{ + SchemaObject: "edit", + TemplatePath: bc.TemplateConfig.Edit, + TemplateData: model.PromptTemplateData{ + SystemPrompt: bc.SystemPrompt, + Instruction: request.Instruction, + }, + ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { + return schema.Choice{ + Index: promptIndex, + FinishReason: "stop", + Text: resp.Response, + } + }, + CompletionMappingFn: simpleMapper, + TokenMappingFn: simpleMapper, + } + }, notifyOnPromptResult, notifyOnToken, nil) +} + +func (oais *OpenAIService) Chat(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + return oais.GenerateFromMultipleMessagesChatRequest(request, notifyOnPromptResult, notifyOnToken, nil) +} + +func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest, endpointConfigFn endpointGenerationConfigurationFn, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + if initialTraceID == nil { + traceID = &OpenAIRequestTraceID{ + ID: uuid.New().String(), + Created: int(time.Now().Unix()), + } + } else { + traceID = initialTraceID + } + + bc, request, err := oais.getConfig(request) + if err != nil { + log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err) + return + } + + if request.ResponseFormat.Type == "json_object" { + request.Grammar = grammar.JSONBNF + } + + bc.Grammar = request.Grammar + + if request.Stream && len(bc.PromptStrings) > 1 { + log.Warn().Msg("potentially cannot handle more than 1 `PromptStrings` when Streaming?") + } + + rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + finalResultChannel = rawFinalResultChannel + promptResultsChannels = []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle]{} + var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + if notifyOnPromptResult { + rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + if notifyOnToken { + rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + + promptResultsChannelLock := sync.Mutex{} + + endpointConfig := endpointConfigFn(bc, request) + + if len(endpointConfig.TemplatePath) == 0 { + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { + endpointConfig.TemplatePath = bc.Model + } else { + log.Warn().Msgf("failed to find any template for %+v", request) + } + } + + setupWG := sync.WaitGroup{} + var prompts []string + if lPS := len(bc.PromptStrings); lPS > 0 { + setupWG.Add(lPS) + prompts = bc.PromptStrings + } else { + setupWG.Add(len(bc.InputStrings)) + prompts = bc.InputStrings + } + + var setupError error = nil + + for pI, p := range prompts { + + go func(promptIndex int, prompt string) { + if endpointConfig.TemplatePath != "" { + promptTemplateData := model.PromptTemplateData{ + Input: prompt, + } + err := mergo.Merge(promptTemplateData, endpointConfig.TemplateData, mergo.WithOverride) + if err == nil { + templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, endpointConfig.TemplatePath, promptTemplateData) + if err == nil { + prompt = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", prompt) + } + } + } + + log.Debug().Msgf("[OAIS GenerateTextFromRequest] Prompt: %q", prompt) + promptResultsChannel, completionChannels, tokenChannels, err := oais.llmbs.GenerateText(prompt, request, bc, + func(r *backend.LLMResponse) schema.Choice { + return endpointConfig.ResultMappingFn(r, promptIndex) + }, notifyOnPromptResult, notifyOnToken) + if err != nil { + log.Error().Msgf("Unable to generate text prompt: %q\nerr: %q", prompt, err) + promptResultsChannelLock.Lock() + setupError = errors.Join(setupError, err) + promptResultsChannelLock.Unlock() + setupWG.Done() + return + } + if notifyOnPromptResult { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(completionChannels, endpointConfig.CompletionMappingFn), rawCompletionsChannel, true) + } + if notifyOnToken { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, endpointConfig.TokenMappingFn), rawTokenChannel, true) + } + promptResultsChannelLock.Lock() + promptResultsChannels = append(promptResultsChannels, promptResultsChannel) + promptResultsChannelLock.Unlock() + setupWG.Done() + }(pI, p) + + } + setupWG.Wait() + + // If any of the setup goroutines experienced an error, quit early here. + if setupError != nil { + go func() { + log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError) + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError} + close(rawFinalResultChannel) + }() + return + } + + initialResponse := &schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, + Object: endpointConfig.SchemaObject, + Usage: schema.OpenAIUsage{}, + } + + // utils.SliceOfChannelsRawMerger[[]schema.Choice](promptResultsChannels, rawFinalResultChannel, func(results []schema.Choice) (*schema.OpenAIResponse, error) { + concurrency.SliceOfChannelsReducer( + promptResultsChannels, rawFinalResultChannel, + func(iv concurrency.ErrorOr[*backend.LLMResponseBundle], result concurrency.ErrorOr[*schema.OpenAIResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { + + if iv.Error != nil { + result.Error = iv.Error + return result + } + result.Value.Usage.PromptTokens += iv.Value.Usage.Prompt + result.Value.Usage.CompletionTokens += iv.Value.Usage.Completion + result.Value.Usage.TotalTokens = result.Value.Usage.PromptTokens + result.Value.Usage.CompletionTokens + + result.Value.Choices = append(result.Value.Choices, iv.Value.Response...) + + return result + }, concurrency.ErrorOr[*schema.OpenAIResponse]{Value: initialResponse}, true) + + completionsChannel = rawCompletionsChannel + tokenChannel = rawTokenChannel + + return +} + +// TODO: For porting sanity, this is distinct from GenerateTextFromRequest and is _currently_ specific to Chat purposes +// this is not a final decision -- just a reality of moving a lot of parts at once +// / This has _become_ Chat which wasn't the goal... More cleanup in the future once it's stable? +func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + if initialTraceID == nil { + traceID = &OpenAIRequestTraceID{ + ID: uuid.New().String(), + Created: int(time.Now().Unix()), + } + } else { + traceID = initialTraceID + } + + bc, request, err := oais.getConfig(request) + if err != nil { + return + } + + // Allow the user to set custom actions via config file + // to be "embedded" in each model + noActionName := "answer" + noActionDescription := "use this action to answer without performing any action" + + if bc.FunctionsConfig.NoActionFunctionName != "" { + noActionName = bc.FunctionsConfig.NoActionFunctionName + } + if bc.FunctionsConfig.NoActionDescriptionName != "" { + noActionDescription = bc.FunctionsConfig.NoActionDescriptionName + } + + if request.ResponseFormat.Type == "json_object" { + request.Grammar = grammar.JSONBNF + } + + bc.Grammar = request.Grammar + + processFunctions := false + funcs := grammar.Functions{} + // process functions if we have any defined or if we have a function call string + if len(request.Functions) > 0 && bc.ShouldUseFunctions() { + log.Debug().Msgf("Response needs to process functions") + + processFunctions = true + + noActionGrammar := grammar.Function{ + Name: noActionName, + Description: noActionDescription, + Parameters: map[string]interface{}{ + "properties": map[string]interface{}{ + "message": map[string]interface{}{ + "type": "string", + "description": "The message to reply the user with", + }}, + }, + } + + // Append the no action function + funcs = append(funcs, request.Functions...) + if !bc.FunctionsConfig.DisableNoAction { + funcs = append(funcs, noActionGrammar) + } + + // Force picking one of the functions by the request + if bc.FunctionToCall() != "" { + funcs = funcs.Select(bc.FunctionToCall()) + } + + // Update input grammar + jsStruct := funcs.ToJSONStructure() + bc.Grammar = jsStruct.Grammar("", bc.FunctionsConfig.ParallelCalls) + } else if request.JSONFunctionGrammarObject != nil { + bc.Grammar = request.JSONFunctionGrammarObject.Grammar("", bc.FunctionsConfig.ParallelCalls) + } + + if request.Stream && processFunctions { + log.Warn().Msg("Streaming + Functions is highly experimental in this version") + } + + var predInput string + + if !bc.TemplateConfig.UseTokenizerTemplate || processFunctions { + + suppressConfigSystemPrompt := false + mess := []string{} + for messageIndex, i := range request.Messages { + var content string + role := i.Role + + // if function call, we might want to customize the role so we can display better that the "assistant called a json action" + // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { + roleFn := "assistant_function_call" + r := bc.Roles[roleFn] + if r != "" { + role = roleFn + } + } + r := bc.Roles[role] + contentExists := i.Content != nil && i.StringContent != "" + + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + + // First attempt to populate content via a chat message specific template + if bc.TemplateConfig.ChatMessage != "" { + chatMessageData := model.ChatMessageTemplateData{ + SystemPrompt: bc.SystemPrompt, + Role: r, + RoleName: role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(request.Messages) - 1), + Function: bc.Grammar != "" && (messageIndex == (len(request.Messages) - 1)), + MessageIndex: messageIndex, + } + templatedChatMessage, err := oais.ml.EvaluateTemplateForChatMessage(bc.TemplateConfig.ChatMessage, chatMessageData) + if err != nil { + log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, bc.TemplateConfig.ChatMessage, err) + } else { + if templatedChatMessage == "" { + log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", bc.TemplateConfig.ChatMessage, chatMessageData) + continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + } + log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) + content = templatedChatMessage + } + } + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } + // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. + if content == "" { + if r != "" { + if contentExists { + content = fmt.Sprint(r, i.StringContent) + } + + if i.FunctionCall != nil { + marshalAnyRole(i.FunctionCall) + } + } else { + if contentExists { + content = fmt.Sprint(i.StringContent) + } + + if i.FunctionCall != nil { + marshalAny(i.FunctionCall) + } + + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) + } + } + // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately + if contentExists && role == "system" { + suppressConfigSystemPrompt = true + } + } + + mess = append(mess, content) + } + + predInput = strings.Join(mess, "\n") + + log.Debug().Msgf("Prompt (before templating): %s", predInput) + + templateFile := "" + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { + templateFile = bc.Model + } + + if bc.TemplateConfig.Chat != "" && !processFunctions { + templateFile = bc.TemplateConfig.Chat + } + + if bc.TemplateConfig.Functions != "" && processFunctions { + templateFile = bc.TemplateConfig.Functions + } + + if templateFile != "" { + templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: bc.SystemPrompt, + SuppressSystemPrompt: suppressConfigSystemPrompt, + Input: predInput, + Functions: funcs, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) + } else { + log.Debug().Msgf("Template failed loading: %s", err.Error()) + } + } + } + log.Debug().Msgf("Prompt (after templating): %s", predInput) + if processFunctions { + log.Debug().Msgf("Grammar: %+v", bc.Grammar) + } + + rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + if notifyOnPromptResult { + rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + if notifyOnToken { + rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + + rawResultChannel, individualCompletionChannels, tokenChannels, err := oais.llmbs.GenerateText(predInput, request, bc, func(resp *backend.LLMResponse) schema.Choice { + return schema.Choice{ + Index: 0, // ??? + FinishReason: "stop", + Message: &schema.Message{ + Role: "assistant", + Content: resp.Response, + }, + } + }, notifyOnPromptResult, notifyOnToken) + + chatSimpleMappingFn := func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { + if resp.Error != nil || resp.Value == nil { + return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} + } + return concurrency.ErrorOr[*schema.OpenAIResponse]{ + Value: &schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{ + { + Delta: &schema.Message{ + Role: "assistant", + Content: resp.Value.Response, + }, + Index: 0, + }, + }, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: resp.Value.Usage.Prompt, + CompletionTokens: resp.Value.Usage.Completion, + TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, + }, + }, + } + } + + if notifyOnPromptResult { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(individualCompletionChannels, chatSimpleMappingFn), rawCompletionsChannel, true) + } + if notifyOnToken { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, chatSimpleMappingFn), rawTokenChannel, true) + } + + go func() { + rawResult := <-rawResultChannel + if rawResult.Error != nil { + log.Warn().Msgf("OpenAIService::processTools GenerateText error [DEBUG THIS?] %q", rawResult.Error) + return + } + llmResponseChoices := rawResult.Value.Response + + if processFunctions && len(llmResponseChoices) > 1 { + log.Warn().Msgf("chat functions response with %d choices in response, debug this?", len(llmResponseChoices)) + log.Debug().Msgf("%+v", llmResponseChoices) + } + + for _, result := range rawResult.Value.Response { + // If no functions, just return the raw result. + if !processFunctions { + + resp := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{result}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: rawResult.Value.Usage.Prompt, + CompletionTokens: rawResult.Value.Usage.Completion, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + }, + } + + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} + + continue + } + // At this point, things are function specific! + + // Oh no this can't be the right way to do this... but it works. Save us, mudler! + fString := fmt.Sprintf("%s", result.Message.Content) + results := parseFunctionCall(fString, bc.FunctionsConfig.ParallelCalls) + noActionToRun := (len(results) > 0 && results[0].name == noActionName) + + if noActionToRun { + log.Debug().Msg("-- noActionToRun branch --") + initialMessage := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: ""}}}, + Object: "stop", + } + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} + + result, err := oais.handleQuestion(bc, request, results[0].arguments, predInput) + if err != nil { + log.Error().Msgf("error handling question: %s", err.Error()) + return + } + + resp := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: rawResult.Value.Usage.Prompt, + CompletionTokens: rawResult.Value.Usage.Completion, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + }, + } + + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} + + } else { + log.Debug().Msgf("[GenerateFromMultipleMessagesChatRequest] fnResultsBranch: %+v", results) + for i, ss := range results { + name, args := ss.name, ss.arguments + + initialMessage := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + FinishReason: "function_call", + Message: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: traceID.ID, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + Arguments: args, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} + } + } + } + + close(rawFinalResultChannel) + }() + + finalResultChannel = rawFinalResultChannel + completionsChannel = rawCompletionsChannel + tokenChannel = rawTokenChannel + return +} + +func (oais *OpenAIService) handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, args, prompt string) (string, error) { + log.Debug().Msgf("[handleQuestion called] nothing to do, computing a reply") + + // If there is a message that the LLM already sends as part of the JSON reply, use it + arguments := map[string]interface{}{} + json.Unmarshal([]byte(args), &arguments) + m, exists := arguments["message"] + if exists { + switch message := m.(type) { + case string: + if message != "" { + log.Debug().Msgf("Reply received from LLM: %s", message) + message = oais.llmbs.Finetune(*config, prompt, message) + log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) + + return message, nil + } + } + } + + log.Debug().Msgf("No action received from LLM, without a message, computing a reply") + // Otherwise ask the LLM to understand the JSON output and the context, and return a message + // Note: This costs (in term of CPU/GPU) another computation + config.Grammar = "" + images := []string{} + for _, m := range input.Messages { + images = append(images, m.StringImages...) + } + + resultChannel, _, err := oais.llmbs.Inference(input.Context, &backend.LLMRequest{ + Text: prompt, + Images: images, + RawMessages: input.Messages, // Experimental + }, config, false) + + if err != nil { + log.Error().Msgf("inference setup error: %s", err.Error()) + return "", err + } + + raw := <-resultChannel + if raw.Error != nil { + log.Error().Msgf("inference error: %q", raw.Error.Error()) + return "", err + } + if raw.Value == nil { + log.Warn().Msgf("nil inference response") + return "", nil + } + return oais.llmbs.Finetune(*config, prompt, raw.Value.Response), nil +} + +type funcCallResults struct { + name string + arguments string +} + +func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { + + results := []funcCallResults{} + + // TODO: use generics to avoid this code duplication + if multipleResults { + ss := []map[string]interface{}{} + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + + for _, s := range ss { + func_name, ok := s["function"] + if !ok { + continue + } + args, ok := s["arguments"] + if !ok { + continue + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + continue + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + } else { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + // s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(llmresult), &ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name, ok := ss["function"] + if !ok { + log.Debug().Msg("ss[function] is not OK!") + return results + } + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + log.Debug().Msg("ss[arguments] is not OK!") + return results + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + log.Debug().Msgf("unexpected func_name: %+v", func_name) + return results + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + return results +} diff --git a/core/startup/startup.go b/core/startup/startup.go index 6298f034..92ccaa9d 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -4,17 +4,21 @@ import ( "fmt" "os" + "github.com/go-skynet/LocalAI/core" + "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" + openaiendpoint "github.com/go-skynet/LocalAI/core/http/endpoints/openai" // TODO: This is dubious. Fix this when splitting assistant api up. "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/assets" "github.com/go-skynet/LocalAI/pkg/model" - pkgStartup "github.com/go-skynet/LocalAI/pkg/startup" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) -func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { +// (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { +func Startup(opts ...config.AppOption) (*core.Application, error) { options := config.NewApplicationConfig(opts...) zerolog.SetGlobalLevel(zerolog.InfoLevel) @@ -27,68 +31,75 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode // Make sure directories exists if options.ModelPath == "" { - return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") + return nil, fmt.Errorf("options.ModelPath cannot be empty") } err := os.MkdirAll(options.ModelPath, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) + return nil, fmt.Errorf("unable to create ModelPath: %q", err) } if options.ImageDir != "" { err := os.MkdirAll(options.ImageDir, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) + return nil, fmt.Errorf("unable to create ImageDir: %q", err) } } if options.AudioDir != "" { err := os.MkdirAll(options.AudioDir, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) + return nil, fmt.Errorf("unable to create AudioDir: %q", err) } } if options.UploadDir != "" { err := os.MkdirAll(options.UploadDir, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) + return nil, fmt.Errorf("unable to create UploadDir: %q", err) + } + } + if options.ConfigsDir != "" { + err := os.MkdirAll(options.ConfigsDir, 0755) + if err != nil { + return nil, fmt.Errorf("unable to create ConfigsDir: %q", err) } } - // - pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) + // Load config jsons + utils.LoadConfig(options.UploadDir, openaiendpoint.UploadedFilesFile, &openaiendpoint.UploadedFiles) + utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsConfigFile, &openaiendpoint.Assistants) + utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsFileConfigFile, &openaiendpoint.AssistantFiles) - cl := config.NewBackendConfigLoader() - ml := model.NewModelLoader(options.ModelPath) + app := createApplication(options) - configLoaderOpts := options.ToConfigLoaderOptions() + services.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) - if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { + if err := app.BackendConfigLoader.LoadBackendConfigsFromPath(options.ModelPath, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { log.Error().Err(err).Msg("error loading config files") } if options.ConfigFile != "" { - if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil { + if err := app.BackendConfigLoader.LoadBackendConfigFile(options.ConfigFile, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { log.Error().Err(err).Msg("error loading config file") } } - if err := cl.Preload(options.ModelPath); err != nil { + if err := app.BackendConfigLoader.Preload(options.ModelPath); err != nil { log.Error().Err(err).Msg("error downloading models") } if options.PreloadJSONModels != "" { - if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil { - return nil, nil, nil, err + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, app.BackendConfigLoader, options.Galleries); err != nil { + return nil, err } } if options.PreloadModelsFromPath != "" { - if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil { - return nil, nil, nil, err + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, app.BackendConfigLoader, options.Galleries); err != nil { + return nil, err } } if options.Debug { - for _, v := range cl.ListBackendConfigs() { - cfg, _ := cl.GetBackendConfig(v) + for _, v := range app.BackendConfigLoader.ListBackendConfigs() { + cfg, _ := app.BackendConfigLoader.GetBackendConfig(v) log.Debug().Msgf("Model: %s (config: %+v)", v, cfg) } } @@ -106,17 +117,17 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode go func() { <-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down") - ml.StopAllGRPC() + app.ModelLoader.StopAllGRPC() }() if options.WatchDog { wd := model.NewWatchDog( - ml, + app.ModelLoader, options.WatchDogBusyTimeout, options.WatchDogIdleTimeout, options.WatchDogBusy, options.WatchDogIdle) - ml.SetWatchDog(wd) + app.ModelLoader.SetWatchDog(wd) go wd.Run() go func() { <-options.Context.Done() @@ -126,5 +137,35 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode } log.Info().Msg("core/startup process completed!") - return cl, ml, options, nil + return app, nil +} + +// In Lieu of a proper DI framework, this function wires up the Application manually. +// This is in core/startup rather than core/state.go to keep package references clean! +func createApplication(appConfig *config.ApplicationConfig) *core.Application { + app := &core.Application{ + ApplicationConfig: appConfig, + BackendConfigLoader: config.NewBackendConfigLoader(), + ModelLoader: model.NewModelLoader(appConfig.ModelPath), + } + + var err error + + app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + + app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath) + app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) + + app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() + if err != nil { + log.Warn().Msg("Unable to initialize LocalAIMetricsService - non-fatal, optional service") + } + + return app } diff --git a/core/state.go b/core/state.go new file mode 100644 index 00000000..cf0d614b --- /dev/null +++ b/core/state.go @@ -0,0 +1,41 @@ +package core + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/model" +) + +// TODO: Can I come up with a better name or location for this? +// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy +// Perhaps a proper DI system is worth it in the future, but for now keep things simple. +type Application struct { + + // Application-Level Config + ApplicationConfig *config.ApplicationConfig + // ApplicationState *ApplicationState + + // Core Low-Level Services + BackendConfigLoader *config.BackendConfigLoader + ModelLoader *model.ModelLoader + + // Backend Services + EmbeddingsBackendService *backend.EmbeddingsBackendService + ImageGenerationBackendService *backend.ImageGenerationBackendService + LLMBackendService *backend.LLMBackendService + TranscriptionBackendService *backend.TranscriptionBackendService + TextToSpeechBackendService *backend.TextToSpeechBackendService + + // LocalAI System Services + BackendMonitorService *services.BackendMonitorService + GalleryService *services.GalleryService + ListModelsService *services.ListModelsService + LocalAIMetricsService *services.LocalAIMetricsService + OpenAIService *services.OpenAIService +} + +// TODO [NEXT PR?]: Break up ApplicationConfig. +// Migrate over stuff that is not set via config at all - especially runtime stuff +type ApplicationState struct { +} diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru new file mode 100644 index 00000000..c33bafe1 --- /dev/null +++ b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru @@ -0,0 +1,25 @@ +meta { + name: -completions Stream + type: http + seq: 4 +} + +post { + url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions + body: json + auth: none +} + +headers { + Content-Type: application/json +} + +body:json { + { + "model": "{{DEFAULT_MODEL}}", + "prompt": "function downloadFile(string url, string outputPath) {", + "max_tokens": 256, + "temperature": 0.5, + "stream": true + } +} diff --git a/pkg/concurrency/concurrency.go b/pkg/concurrency/concurrency.go new file mode 100644 index 00000000..324e8cc5 --- /dev/null +++ b/pkg/concurrency/concurrency.go @@ -0,0 +1,135 @@ +package concurrency + +import ( + "sync" +) + +// TODO: closeWhenDone bool parameter :: +// It currently is experimental, and therefore exists. +// Is there ever a situation to use false? + +// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of a second type. +// mappingFn allows the caller to convert from the input type to the output type +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsRawMerger[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan IndividualResultType, outputChannel chan<- OutputResultType, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { + var wg sync.WaitGroup + wg.Add(len(individualResultChannels)) + mergingFn := func(c <-chan IndividualResultType) { + for r := range c { + mr, err := mappingFn(r) + if err == nil { + outputChannel <- mr + } + } + wg.Done() + } + for _, irc := range individualResultChannels { + go mergingFn(irc) + } + if closeWhenDone { + go func() { + wg.Wait() + close(outputChannel) + }() + } + + return &wg +} + +// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of THE SAME TYPE. +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsRawMergerWithoutMapping[ResultType any](individualResultsChannels []<-chan ResultType, outputChannel chan<- ResultType, closeWhenDone bool) *sync.WaitGroup { + return SliceOfChannelsRawMerger(individualResultsChannels, outputChannel, func(v ResultType) (ResultType, error) { return v, nil }, closeWhenDone) +} + +// This function is used to merge the results of a slice of channels of a specific result type down to a single succcess result channel of a second type, and an error channel +// mappingFn allows the caller to convert from the input type to the output type +// This variant is designed to be aware of concurrency.ErrorOr[T], splitting successes from failures. +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsMergerWithErrors[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan ErrorOr[IndividualResultType], successChannel chan<- OutputResultType, errorChannel chan<- error, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { + var wg sync.WaitGroup + wg.Add(len(individualResultChannels)) + mergingFn := func(c <-chan ErrorOr[IndividualResultType]) { + for r := range c { + if r.Error != nil { + errorChannel <- r.Error + } else { + mv, err := mappingFn(r.Value) + if err != nil { + errorChannel <- err + } else { + successChannel <- mv + } + } + } + wg.Done() + } + for _, irc := range individualResultChannels { + go mergingFn(irc) + } + if closeWhenDone { + go func() { + wg.Wait() + close(successChannel) + close(errorChannel) + }() + } + return &wg +} + +// This function is used to reduce down the results of a slice of channels of a specific result type down to a single result value of a second type. +// reducerFn allows the caller to convert from the input type to the output type +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsReducer[InputResultType any, OutputResultType any](individualResultsChannels []<-chan InputResultType, outputChannel chan<- OutputResultType, + reducerFn func(iv InputResultType, ov OutputResultType) OutputResultType, initialValue OutputResultType, closeWhenDone bool) (wg *sync.WaitGroup) { + wg = &sync.WaitGroup{} + wg.Add(len(individualResultsChannels)) + reduceLock := sync.Mutex{} + reducingFn := func(c <-chan InputResultType) { + for iv := range c { + reduceLock.Lock() + initialValue = reducerFn(iv, initialValue) + reduceLock.Unlock() + } + wg.Done() + } + for _, irc := range individualResultsChannels { + go reducingFn(irc) + } + go func() { + wg.Wait() + outputChannel <- initialValue + if closeWhenDone { + close(outputChannel) + } + }() + return wg +} + +// This function is primarily designed to be used in combination with the above utility functions. +// A slice of input result channels of a specific type is provided, along with a function to map those values to another type +// A slice of output result channels is returned, where each value is mapped as it comes in. +// The order of the slice will be retained. +func SliceOfChannelsTransformer[InputResultType any, OutputResultType any](inputChanels []<-chan InputResultType, mappingFn func(v InputResultType) OutputResultType) (outputChannels []<-chan OutputResultType) { + rawOutputChannels := make([]<-chan OutputResultType, len(inputChanels)) + + transformingFn := func(ic <-chan InputResultType, oc chan OutputResultType) { + for iv := range ic { + oc <- mappingFn(iv) + } + close(oc) + } + + for ci, c := range inputChanels { + roc := make(chan OutputResultType) + go transformingFn(c, roc) + rawOutputChannels[ci] = roc + } + + outputChannels = rawOutputChannels + return +} diff --git a/pkg/concurrency/concurrency_test.go b/pkg/concurrency/concurrency_test.go new file mode 100644 index 00000000..fedd74be --- /dev/null +++ b/pkg/concurrency/concurrency_test.go @@ -0,0 +1,101 @@ +package concurrency_test + +// TODO: noramlly, these go in utils_tests, right? Why does this cause problems only in pkg/utils? + +import ( + "fmt" + "slices" + + . "github.com/go-skynet/LocalAI/pkg/concurrency" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("utils/concurrency tests", func() { + It("SliceOfChannelsReducer works", func() { + individualResultsChannels := []<-chan int{} + initialValue := 0 + for i := 0; i < 3; i++ { + c := make(chan int) + go func(i int, c chan int) { + for ii := 1; ii < 4; ii++ { + c <- (i * ii) + } + close(c) + }(i, c) + individualResultsChannels = append(individualResultsChannels, c) + } + Expect(len(individualResultsChannels)).To(Equal(3)) + finalResultChannel := make(chan int) + wg := SliceOfChannelsReducer[int, int](individualResultsChannels, finalResultChannel, func(input int, val int) int { + return val + input + }, initialValue, true) + + Expect(wg).ToNot(BeNil()) + + result := <-finalResultChannel + + Expect(result).ToNot(Equal(0)) + Expect(result).To(Equal(18)) + }) + + It("SliceOfChannelsRawMergerWithoutMapping works", func() { + individualResultsChannels := []<-chan int{} + for i := 0; i < 3; i++ { + c := make(chan int) + go func(i int, c chan int) { + for ii := 1; ii < 4; ii++ { + c <- (i * ii) + } + close(c) + }(i, c) + individualResultsChannels = append(individualResultsChannels, c) + } + Expect(len(individualResultsChannels)).To(Equal(3)) + outputChannel := make(chan int) + wg := SliceOfChannelsRawMergerWithoutMapping(individualResultsChannels, outputChannel, true) + Expect(wg).ToNot(BeNil()) + outputSlice := []int{} + for v := range outputChannel { + outputSlice = append(outputSlice, v) + } + Expect(len(outputSlice)).To(Equal(9)) + slices.Sort(outputSlice) + Expect(outputSlice[0]).To(BeZero()) + Expect(outputSlice[3]).To(Equal(1)) + Expect(outputSlice[8]).To(Equal(6)) + }) + + It("SliceOfChannelsTransformer works", func() { + individualResultsChannels := []<-chan int{} + for i := 0; i < 3; i++ { + c := make(chan int) + go func(i int, c chan int) { + for ii := 1; ii < 4; ii++ { + c <- (i * ii) + } + close(c) + }(i, c) + individualResultsChannels = append(individualResultsChannels, c) + } + Expect(len(individualResultsChannels)).To(Equal(3)) + mappingFn := func(i int) string { + return fmt.Sprintf("$%d", i) + } + + outputChannels := SliceOfChannelsTransformer(individualResultsChannels, mappingFn) + Expect(len(outputChannels)).To(Equal(3)) + rSlice := []string{} + for ii := 1; ii < 4; ii++ { + for i := 0; i < 3; i++ { + res := <-outputChannels[i] + rSlice = append(rSlice, res) + } + } + slices.Sort(rSlice) + Expect(rSlice[0]).To(Equal("$0")) + Expect(rSlice[3]).To(Equal("$1")) + Expect(rSlice[8]).To(Equal("$6")) + }) +}) diff --git a/pkg/concurrency/types.go b/pkg/concurrency/types.go new file mode 100644 index 00000000..76081ba3 --- /dev/null +++ b/pkg/concurrency/types.go @@ -0,0 +1,6 @@ +package concurrency + +type ErrorOr[T any] struct { + Value T + Error error +} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 8fb8c39d..49a6b1bd 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -41,7 +41,7 @@ type Backend interface { PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) - AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) + AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error) diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index 0af5d94f..c0b4bc34 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error { return fmt.Errorf("unimplemented") } -func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) { - return schema.Result{}, fmt.Errorf("unimplemented") +func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) { + return schema.TranscriptionResult{}, fmt.Errorf("unimplemented") } func (llm *Base) TTS(*pb.TTSRequest) error { diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 882db12a..0e0e56c7 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp return client.TTS(ctx, in, opts...) } -func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques if err != nil { return nil, err } - tresult := &schema.Result{} + tresult := &schema.TranscriptionResult{} for _, s := range res.Segments { tks := []int{} for _, t := range s.Tokens { diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 73b185a3..b4ba4884 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc. return e.s.TTS(ctx, in) } -func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { r, err := e.s.AudioTranscription(ctx, in) if err != nil { return nil, err } - tr := &schema.Result{} + tr := &schema.TranscriptionResult{} for _, s := range r.Segments { var tks []int for _, t := range s.Tokens { diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index 4d06544d..aa7a3fbc 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -15,7 +15,7 @@ type LLM interface { Load(*pb.ModelOptions) error Embeddings(*pb.PredictOptions) ([]float32, error) GenerateImage(*pb.GenerateImageRequest) error - AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) + AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) TTS(*pb.TTSRequest) error TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error) Status() (pb.StatusResponse, error) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 5d9808a4..617d8f62 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -81,7 +81,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if _, err := os.Stat(uri); err == nil { serverAddress, err := getFreeAddress() if err != nil { - return "", fmt.Errorf("failed allocating free ports: %s", err.Error()) + return "", fmt.Errorf("%s failed allocating free ports: %s", backend, err.Error()) } // Make sure the process is executable if err := ml.startProcess(uri, o.model, serverAddress); err != nil { @@ -134,7 +134,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if !ready { log.Debug().Msgf("GRPC Service NOT ready") - return "", fmt.Errorf("grpc service not ready") + return "", fmt.Errorf("%s grpc service not ready", backend) } options := *o.gRPCOptions @@ -145,10 +145,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options) if err != nil { - return "", fmt.Errorf("could not load model: %w", err) + return "", fmt.Errorf("\"%s\" could not load model: %w", backend, err) } if !res.Success { - return "", fmt.Errorf("could not load model (no success): %s", res.Message) + return "", fmt.Errorf("\"%s\" could not load model (no success): %s", backend, res.Message) } return client, nil diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go deleted file mode 100644 index b09516a7..00000000 --- a/pkg/startup/model_preload.go +++ /dev/null @@ -1,85 +0,0 @@ -package startup - -import ( - "errors" - "os" - "path/filepath" - - "github.com/go-skynet/LocalAI/embedded" - "github.com/go-skynet/LocalAI/pkg/downloader" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" -) - -// PreloadModelsConfigurations will preload models from the given list of URLs -// It will download the model if it is not already present in the model path -// It will also try to resolve if the model is an embedded model YAML configuration -func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { - for _, url := range models { - - // As a best effort, try to resolve the model from the remote library - // if it's not resolved we try with the other method below - if modelLibraryURL != "" { - lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) - if err == nil { - if lib[url] != "" { - log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) - url = lib[url] - } - } - } - - url = embedded.ModelShortURL(url) - switch { - case embedded.ExistsInModelsLibrary(url): - modelYAML, err := embedded.ResolveContent(url) - // If we resolve something, just save it to disk and continue - if err != nil { - log.Error().Err(err).Msg("error resolving model content") - continue - } - - log.Debug().Msgf("[startup] resolved embedded model: %s", url) - md5Name := utils.MD5(url) - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") - } - case downloader.LooksLikeURL(url): - log.Debug().Msgf("[startup] resolved model to download: %s", url) - - // md5 of model name - md5Name := utils.MD5(url) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - }) - if err != nil { - log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") - } - } - default: - if _, err := os.Stat(url); err == nil { - log.Debug().Msgf("[startup] resolved local model: %s", url) - // copy to modelPath - md5Name := utils.MD5(url) - - modelYAML, err := os.ReadFile(url) - if err != nil { - log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") - continue - } - - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") - } - } else { - log.Warn().Msgf("[startup] failed resolving model '%s'", url) - } - } - } -} diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go new file mode 100644 index 00000000..769d8a88 --- /dev/null +++ b/pkg/utils/base64.go @@ -0,0 +1,50 @@ +package utils + +import ( + "encoding/base64" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +var base64DownloadClient http.Client = http.Client{ + Timeout: 30 * time.Second, +} + +// this function check if the string is an URL, if it's an URL downloads the image in memory +// encodes it in base64 and returns the base64 string + +// This may look weird down in pkg/utils while it is currently only used in core/config +// +// but I believe it may be useful for MQTT as well in the near future, so I'm +// extracting it while I'm thinking of it. +func GetImageURLAsBase64(s string) (string, error) { + if strings.HasPrefix(s, "http") { + // download the image + resp, err := base64DownloadClient.Get(s) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // read the image data into memory + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // encode the image data in base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // return the base64 string + return encoded, nil + } + + // if the string instead is prefixed with "data:image/jpeg;base64,", drop it + if strings.HasPrefix(s, "data:image/jpeg;base64,") { + return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil + } + return "", fmt.Errorf("not valid string") +} From f1f39eea3fd915e8ccc29ad8fa9d20c003ef8ed3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 09:47:33 +0200 Subject: [PATCH 0293/2895] Create localaibot_automerge.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/localaibot_automerge.yml | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/localaibot_automerge.yml diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml new file mode 100644 index 00000000..a540997b --- /dev/null +++ b/.github/workflows/localaibot_automerge.yml @@ -0,0 +1,42 @@ +name: Dependabot auto-merge +on: +- pull_request_target + +permissions: + contents: write + pull-requests: write + packages: read + +jobs: + dependabot: + runs-on: ubuntu-latest + if: ${{ github.actor == 'localai-bot' }} + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@v1.3.4 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + skip-commit-verification: true + + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Approve a PR if not already approved + run: | + gh pr checkout "$PR_URL" + if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ]; + then + gh pr review --approve "$PR_URL" + else + echo "PR already approved."; + fi + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + + - name: Enable auto-merge for LocalAIBot PRs + run: gh pr merge --auto --squash "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} From 95244ed6e7598db09fa8974052f550bb1dcc9d8e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 10:03:15 +0200 Subject: [PATCH 0294/2895] Update localaibot_automerge.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/localaibot_automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml index a540997b..98629ab7 100644 --- a/.github/workflows/localaibot_automerge.yml +++ b/.github/workflows/localaibot_automerge.yml @@ -1,4 +1,4 @@ -name: Dependabot auto-merge +name: LocalAI-bot auto-merge on: - pull_request_target From 4e74560649b0cb54fd1ab03d3a7a4105e2dd01fd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 13:30:40 +0200 Subject: [PATCH 0295/2895] ci: fix release pipeline missing dependencies (#2025) --- .github/workflows/release.yaml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 3c1cea44..33c640cc 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,6 +1,8 @@ name: Build and Release -on: push +on: +- push +- pull_request env: GRPC_VERSION: v1.58.0 @@ -40,7 +42,7 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential ffmpeg + sudo apt-get install build-essential ffmpeg protobuf-compiler - name: Install CUDA Dependencies if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }} run: | @@ -75,6 +77,9 @@ jobs: CMAKE_ARGS: "${{ matrix.defines }}" BUILD_ID: "${{ matrix.build }}" run: | + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + export PATH=$PATH:$GOPATH/bin if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then export BUILD_TYPE=cublas export PATH=/usr/local/cuda/bin:$PATH @@ -106,9 +111,12 @@ jobs: cache: false - name: Dependencies run: | - sudo apt-get install -y --no-install-recommends libopencv-dev + sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - name: Build stablediffusion run: | + export PATH=$PATH:$GOPATH/bin make backend-assets/grpc/stablediffusion mkdir -p release && cp backend-assets/grpc/stablediffusion release - uses: actions/upload-artifact@v4 @@ -139,6 +147,8 @@ jobs: - name: Dependencies run: | brew install protobuf grpc + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - name: Build id: build env: @@ -147,6 +157,7 @@ jobs: run: | export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include + export PATH=$PATH:$GOPATH/bin make dist - uses: actions/upload-artifact@v4 with: @@ -183,6 +194,8 @@ jobs: - name: Dependencies run: | brew install protobuf grpc + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - name: Build id: build env: @@ -191,6 +204,7 @@ jobs: run: | export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include + export PATH=$PATH:$GOPATH/bin make dist - uses: actions/upload-artifact@v4 with: From b91820b7f88173e532af8de509d43dd6191a2386 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 13:46:07 +0200 Subject: [PATCH 0296/2895] Update localaibot_automerge.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/localaibot_automerge.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml index 98629ab7..74c725f3 100644 --- a/.github/workflows/localaibot_automerge.yml +++ b/.github/workflows/localaibot_automerge.yml @@ -12,13 +12,6 @@ jobs: runs-on: ubuntu-latest if: ${{ github.actor == 'localai-bot' }} steps: - - name: Dependabot metadata - id: metadata - uses: dependabot/fetch-metadata@v1.3.4 - with: - github-token: "${{ secrets.GITHUB_TOKEN }}" - skip-commit-verification: true - - name: Checkout repository uses: actions/checkout@v3 From 619f2517a490a1a3448cf5df837a8229b232287a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 13 Apr 2024 15:47:39 +0200 Subject: [PATCH 0297/2895] :arrow_up: Update ggerganov/llama.cpp (#2028) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f86ef23..1b59c604 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=ab9a3240a9da941fdef5cd4a25f2b97c2f5a67aa +CPPLLAMA_VERSION?=4bd0f93e4ab4fe6682e7d0241c1bdec1397e954a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 0fdff269241d5ce93f325a48691bf9ebc5b5b9e6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 18:59:21 +0200 Subject: [PATCH 0298/2895] feat(parler-tts): Add new backend (#2027) * feat(parler-tts): Add new backend Signed-off-by: Ettore Di Giacinto * feat(parler-tts): try downgrade protobuf Signed-off-by: Ettore Di Giacinto * feat(parler-tts): add parler conda env Signed-off-by: Ettore Di Giacinto * Revert "feat(parler-tts): try downgrade protobuf" This reverts commit bd5941d5cfc00676b45a99f71debf3c34249cf3c. Signed-off-by: Ettore Di Giacinto * deps: add grpc Signed-off-by: Ettore Di Giacinto * fix: try to gen proto with same environment * workaround * Revert "fix: try to gen proto with same environment" This reverts commit 998c745e2f475ec3ec43ac017bcebf3a7ce15b8b. * Workaround fixup --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Dave --- .github/workflows/test-extra.yml | 29 ++++ Dockerfile | 5 +- Makefile | 13 +- backend/python/parler-tts/Makefile | 39 ++++++ backend/python/parler-tts/install.sh | 39 ++++++ backend/python/parler-tts/parler-nvidia.yml | 48 +++++++ backend/python/parler-tts/parler.yml | 36 +++++ .../python/parler-tts/parler_tts_server.py | 125 ++++++++++++++++++ backend/python/parler-tts/run.sh | 16 +++ backend/python/parler-tts/test.sh | 11 ++ backend/python/parler-tts/test_parler.py | 81 ++++++++++++ backend/python/transformers-musicgen/run.sh | 2 +- 12 files changed, 440 insertions(+), 4 deletions(-) create mode 100644 backend/python/parler-tts/Makefile create mode 100755 backend/python/parler-tts/install.sh create mode 100644 backend/python/parler-tts/parler-nvidia.yml create mode 100644 backend/python/parler-tts/parler.yml create mode 100644 backend/python/parler-tts/parler_tts_server.py create mode 100644 backend/python/parler-tts/run.sh create mode 100644 backend/python/parler-tts/test.sh create mode 100644 backend/python/parler-tts/test_parler.py diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 7705783e..fa45cb3c 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -104,6 +104,35 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/diffusers make --jobs=5 --output-sync=target -C backend/python/diffusers test + tests-parler-tts: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install build-essential ffmpeg + curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + sudo apt-get update && \ + sudo apt-get install -y conda + sudo apt-get install -y ca-certificates cmake curl patch python3-pip + sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools + + sudo rm -rfv /usr/bin/conda || true + + - name: Test parler-tts + run: | + export PATH=$PATH:/opt/conda/bin + make --jobs=5 --output-sync=target -C backend/python/parler-tts + make --jobs=5 --output-sync=target -C backend/python/parler-tts test tests-transformers-musicgen: runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile index d0217d50..397fbe22 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" ARG GO_TAGS="stablediffusion tinydream tts" @@ -275,6 +275,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/transformers-musicgen \ ; fi +RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ + make -C backend/python/parler-tts \ + ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/coqui \ ; fi diff --git a/Makefile b/Makefile index 1b59c604..d5bc3739 100644 --- a/Makefile +++ b/Makefile @@ -439,10 +439,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -524,6 +524,14 @@ transformers-protogen: transformers-protogen-clean: $(MAKE) -C backend/python/transformers protogen-clean +.PHONY: parler-tts-protogen +parler-tts-protogen: + $(MAKE) -C backend/python/parler-tts protogen + +.PHONY: parler-tts-protogen-clean +parler-tts-protogen-clean: + $(MAKE) -C backend/python/parler-tts protogen-clean + .PHONY: transformers-musicgen-protogen transformers-musicgen-protogen: $(MAKE) -C backend/python/transformers-musicgen protogen @@ -560,6 +568,7 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/sentencetransformers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/transformers-musicgen + $(MAKE) -C backend/python/parler-tts $(MAKE) -C backend/python/vall-e-x $(MAKE) -C backend/python/exllama $(MAKE) -C backend/python/petals diff --git a/backend/python/parler-tts/Makefile b/backend/python/parler-tts/Makefile new file mode 100644 index 00000000..4497762e --- /dev/null +++ b/backend/python/parler-tts/Makefile @@ -0,0 +1,39 @@ +export CONDA_ENV_PATH = "parler.yml" +SKIP_CONDA?=0 +ifeq ($(BUILD_TYPE), cublas) +export CONDA_ENV_PATH = "parler-nvidia.yml" +endif + +# Intel GPU are supposed to have dependencies installed in the main python +# environment, so we skip conda installation for SYCL builds. +# https://github.com/intel/intel-extension-for-pytorch/issues/538 +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + +.PHONY: parler-tts +parler-tts: protogen + @echo "Installing $(CONDA_ENV_PATH)..." + bash install.sh $(CONDA_ENV_PATH) + +.PHONY: run +run: protogen + @echo "Running transformers..." + bash run.sh + @echo "transformers run." + +.PHONY: test +test: protogen + @echo "Testing transformers..." + bash test.sh + @echo "transformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/parler-tts/install.sh b/backend/python/parler-tts/install.sh new file mode 100755 index 00000000..b9965b23 --- /dev/null +++ b/backend/python/parler-tts/install.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -ex + +SKIP_CONDA=${SKIP_CONDA:-0} + +# Check if environment exist +conda_env_exists(){ + ! conda list --name "${@}" >/dev/null 2>/dev/null +} + +if [ $SKIP_CONDA -eq 1 ]; then + echo "Skipping conda environment installation" +else + export PATH=$PATH:/opt/conda/bin + if conda_env_exists "parler" ; then + echo "Creating virtual environment..." + conda env create --name parler --file $1 + echo "Virtual environment created." + else + echo "Virtual environment already exists." + fi +fi + +if [ $SKIP_CONDA -ne 1 ]; then + # Activate conda environment + source activate parler + # https://github.com/descriptinc/audiotools/issues/101 + # incompatible protobuf versions. + curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o $CONDA_PREFIX/lib/python3.11/site-packages/google/protobuf/internal/builder.py +fi + +if [ "$PIP_CACHE_PURGE" = true ] ; then + if [ $SKIP_CONDA -ne 1 ]; then + # Activate conda environment + source activate parler + fi + + pip cache purge +fi \ No newline at end of file diff --git a/backend/python/parler-tts/parler-nvidia.yml b/backend/python/parler-tts/parler-nvidia.yml new file mode 100644 index 00000000..ed925e94 --- /dev/null +++ b/backend/python/parler-tts/parler-nvidia.yml @@ -0,0 +1,48 @@ +name: parler +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py311h06a4308_0 + - python=3.11.5=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py311h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - tzdata=2023c=h04d1e81_0 + - wheel=0.41.2=py311h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - accelerate>=0.11.0 + - grpcio==1.59.0 + - numpy==1.26.0 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.18.1 + - nvidia-nvjitlink-cu12==12.2.140 + - nvidia-nvtx-cu12==12.1.105 + - torch==2.1.0 + - transformers>=4.34.0 + - descript-audio-codec + - sentencepiece + - git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16 +prefix: /opt/conda/envs/diffusers diff --git a/backend/python/parler-tts/parler.yml b/backend/python/parler-tts/parler.yml new file mode 100644 index 00000000..fd0c3cb6 --- /dev/null +++ b/backend/python/parler-tts/parler.yml @@ -0,0 +1,36 @@ +name: parler +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py311h06a4308_0 + - python=3.11.5=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py311h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - tzdata=2023c=h04d1e81_0 + - wheel=0.41.2=py311h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - accelerate>=0.11.0 + - numpy==1.26.0 + - grpcio==1.59.0 + - torch==2.1.0 + - transformers>=4.34.0 + - descript-audio-codec + - sentencepiece + - git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16 +prefix: /opt/conda/envs/parler diff --git a/backend/python/parler-tts/parler_tts_server.py b/backend/python/parler-tts/parler_tts_server.py new file mode 100644 index 00000000..655990d7 --- /dev/null +++ b/backend/python/parler-tts/parler_tts_server.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +Extra gRPC server for MusicgenForConditionalGeneration models. +""" +from concurrent import futures + +import argparse +import signal +import sys +import os + +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + +from scipy.io.wavfile import write as write_wav + +from parler_tts import ParlerTTSForConditionalGeneration +from transformers import AutoTokenizer +import soundfile as sf +import torch + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + A gRPC servicer for the backend service. + + This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. + """ + def Health(self, request, context): + """ + A gRPC method that returns the health status of the backend service. + + Args: + request: A HealthRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Reply object that contains the health status of the backend service. + """ + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + """ + A gRPC method that loads a model into memory. + + Args: + request: A LoadModelRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Result object that contains the result of the LoadModel operation. + """ + model_name = request.Model + device = "cuda:0" if torch.cuda.is_available() else "cpu" + try: + self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device) + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def TTS(self, request, context): + model_name = request.model + voice = request.voice + if voice == "": + voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast." + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + try: + device = "cuda:0" if torch.cuda.is_available() else "cpu" + input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device) + prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device) + + generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids) + audio_arr = generation.cpu().numpy().squeeze() + print("[parler-tts] TTS generated!", file=sys.stderr) + sf.write(request.dst, audio_arr, self.model.config.sampling_rate) + print("[parler-tts] TTS saved to", request.dst, file=sys.stderr) + print("[parler-tts] TTS for", file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + + +def serve(address): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr) + + # Define the signal handler function + def signal_handler(sig, frame): + print("[parler-tts] Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + # Set the signal handlers for SIGINT and SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + print(f"[parler-tts] startup: {args}", file=sys.stderr) + serve(args.addr) diff --git a/backend/python/parler-tts/run.sh b/backend/python/parler-tts/run.sh new file mode 100644 index 00000000..08e42198 --- /dev/null +++ b/backend/python/parler-tts/run.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +## +## A bash script wrapper that runs the parler-tts server with conda + +echo "Launching gRPC server for parler-tts" + +export PATH=$PATH:/opt/conda/bin + +# Activate conda environment +source activate parler + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python $DIR/parler_tts_server.py $@ diff --git a/backend/python/parler-tts/test.sh b/backend/python/parler-tts/test.sh new file mode 100644 index 00000000..1bd15fd1 --- /dev/null +++ b/backend/python/parler-tts/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +## +## A bash script wrapper that runs the transformers server with conda + +# Activate conda environment +source activate parler + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python -m unittest $DIR/test_parler.py \ No newline at end of file diff --git a/backend/python/parler-tts/test_parler.py b/backend/python/parler-tts/test_parler.py new file mode 100644 index 00000000..ce9b66ac --- /dev/null +++ b/backend/python/parler-tts/test_parler.py @@ -0,0 +1,81 @@ +""" +A test script to test the gRPC service +""" +import unittest +import subprocess +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + + +class TestBackendServicer(unittest.TestCase): + """ + TestBackendServicer is the class that tests the gRPC service + """ + def setUp(self): + """ + This method sets up the gRPC service by starting the server + """ + self.service = subprocess.Popen(["python3", "parler_tts_server.py", "--addr", "localhost:50051"]) + time.sleep(10) + + def tearDown(self) -> None: + """ + This method tears down the gRPC service by terminating the server + """ + self.service.terminate() + self.service.wait() + + def test_server_startup(self): + """ + This method tests if the server starts up successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + print(err) + self.fail("Server failed to start") + finally: + self.tearDown() + + def test_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_tts(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1")) + self.assertTrue(response.success) + tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?") + tts_response = stub.TTS(tts_request) + self.assertIsNotNone(tts_response) + except Exception as err: + print(err) + self.fail("TTS service failed") + finally: + self.tearDown() \ No newline at end of file diff --git a/backend/python/transformers-musicgen/run.sh b/backend/python/transformers-musicgen/run.sh index d3dcb968..3d3ffcfd 100644 --- a/backend/python/transformers-musicgen/run.sh +++ b/backend/python/transformers-musicgen/run.sh @@ -8,7 +8,7 @@ echo "Launching gRPC server for transformers-musicgen" export PATH=$PATH:/opt/conda/bin # Activate conda environment -source activate transformers-musicgen +source activate transformers # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" From d5699dbf4f3041eb7f01c996c0a2b3b59319b1bc Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 13 Apr 2024 13:01:17 -0400 Subject: [PATCH 0299/2895] fix - correct checkout versions (#2029) minor fix - bump some checkout@v3 to checkout@v4 to match and clean up warnings Signed-off-by: Dave Lee --- .github/workflows/dependabot_auto.yml | 2 +- .github/workflows/localaibot_automerge.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 51337d20..be3a40a8 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -20,7 +20,7 @@ jobs: skip-commit-verification: true - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Approve a PR if not already approved run: | diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml index 74c725f3..6a1ab891 100644 --- a/.github/workflows/localaibot_automerge.yml +++ b/.github/workflows/localaibot_automerge.yml @@ -13,7 +13,7 @@ jobs: if: ${{ github.actor == 'localai-bot' }} steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Approve a PR if not already approved run: | From 6b07ded11909bf8f52a8e6de402ac8bf91201831 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 19:12:54 +0200 Subject: [PATCH 0300/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/README.md b/README.md index 76a5fc08..4c2f68b2 100644 --- a/README.md +++ b/README.md @@ -50,17 +50,12 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027 - Landing page: https://github.com/mudler/LocalAI/pull/1922 - Openvino support: https://github.com/mudler/LocalAI/pull/1892 - Vector store: https://github.com/mudler/LocalAI/pull/1795 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 - Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715 -- Upload file API: https://github.com/mudler/LocalAI/pull/1703 -- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 / Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653 -- Mamba support: https://github.com/mudler/LocalAI/pull/1589 -- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 -- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489 -- Img2vid https://github.com/mudler/LocalAI/pull/1442 Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 From 4486db912b62e31bffe662b977a31567e62ecbfc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 22:57:46 +0200 Subject: [PATCH 0301/2895] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index ff1dc6a7..ab45e5aa 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -169,7 +169,7 @@ Call functions
```bash -curl https://localhost:8080/v1/chat/completions \ +curl http://localhost:8080/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-4", From b739cbb86b9734bd62d4f63fad6583cf97059ea5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 14 Apr 2024 10:57:07 +0200 Subject: [PATCH 0302/2895] Revert "build(deps): bump the pip group across 4 directories with 8 updates" (#2030) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert "build(deps): bump the pip group across 4 directories with 8 updates (…" This reverts commit e0dee52a2ab811fccc18f309a6c5fefcb4725448. --- docs/data/version.json | 2 +- examples/functions/requirements.txt | 2 +- examples/langchain-chroma/requirements.txt | 4 ++-- .../langchainpy-localai-example/requirements.txt | 12 ++++++------ examples/streamlit-bot/requirements.txt | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/data/version.json b/docs/data/version.json index 6a618115..1b6a2161 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.4" + "version": "v2.12.3" } diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt index 759c5b03..7164e011 100644 --- a/examples/functions/requirements.txt +++ b/examples/functions/requirements.txt @@ -1,2 +1,2 @@ -langchain==0.1.0 +langchain==0.0.234 openai==0.27.8 diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt index cdf466b9..b9e649c5 100644 --- a/examples/langchain-chroma/requirements.txt +++ b/examples/langchain-chroma/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.1.0 +langchain==0.0.160 openai==0.27.6 chromadb==0.3.21 -llama-index==0.9.36 \ No newline at end of file +llama-index==0.6.2 \ No newline at end of file diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 1e63b0bf..2de5bcf0 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,16 +1,16 @@ -aiohttp==3.9.2 +aiohttp==3.8.4 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 -certifi==2023.7.22 +certifi==2022.12.7 charset-normalizer==3.1.0 colorama==0.4.6 dataclasses-json==0.5.7 debugpy==1.6.7 frozenlist==1.3.3 greenlet==2.0.2 -idna==3.7 -langchain==0.1.0 +idna==3.4 +langchain==0.0.159 marshmallow==3.19.0 marshmallow-enum==1.5.1 multidict==6.0.4 @@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4 packaging==23.1 pydantic==1.10.7 PyYAML==6.0 -requests==2.31.0 +requests==2.29.0 SQLAlchemy==2.0.12 tenacity==8.2.2 tqdm==4.65.0 typing-inspect==0.8.0 typing_extensions==4.5.0 -urllib3==1.26.18 +urllib3==1.26.15 yarl==1.9.2 diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt index 1fcd5093..ae527c76 100644 --- a/examples/streamlit-bot/requirements.txt +++ b/examples/streamlit-bot/requirements.txt @@ -1,2 +1,2 @@ -streamlit==1.30.0 +streamlit==1.26.0 requests \ No newline at end of file From 57bd365d876ae55f950821707485f183a6f6685a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 15 Apr 2024 01:31:43 +0200 Subject: [PATCH 0303/2895] :arrow_up: Update docs version mudler/LocalAI (#2032) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 1b6a2161..6a618115 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.3" + "version": "v2.12.4" } From de3a1a0a8e1dcf22aed34cad336962ec53ed89cc Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 15 Apr 2024 01:35:44 +0200 Subject: [PATCH 0304/2895] :arrow_up: Update ggerganov/llama.cpp (#2033) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d5bc3739..04745f39 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4bd0f93e4ab4fe6682e7d0241c1bdec1397e954a +CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e843d7df0e8b177ab122a9f7bfa7196274ccd204 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 15 Apr 2024 19:47:11 +0200 Subject: [PATCH 0305/2895] feat(grpc): return consumed token count and update response accordingly (#2035) Fixes: #1920 --- backend/backend.proto | 2 ++ backend/cpp/llama/grpc-server.cpp | 8 ++++++++ core/backend/llm.go | 6 ++++++ core/services/openai.go | 8 ++++---- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index 56d919ef..62e1a1a6 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -114,6 +114,8 @@ message PredictOptions { // The response message containing the result message Reply { bytes message = 1; + int32 tokens = 2; + int32 prompt_tokens = 3; } message ModelOptions { diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index a2e39a9c..6fb08658 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2332,6 +2332,10 @@ public: std::string completion_text = result.result_json.value("content", ""); reply.set_message(completion_text); + int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); + reply.set_tokens(tokens_predicted); + int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); + reply.set_prompt_tokens(tokens_evaluated); // Send the reply writer->Write(reply); @@ -2357,6 +2361,10 @@ public: task_result result = llama.queue_results.recv(task_id); if (!result.error && result.stop) { completion_text = result.result_json.value("content", ""); + int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); + int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); + reply->set_prompt_tokens(tokens_evaluated); + reply->set_tokens(tokens_predicted); reply->set_message(completion_text); } else diff --git a/core/backend/llm.go b/core/backend/llm.go index 1878e87a..75766d78 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -189,6 +189,12 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, } else { go func() { reply, err := inferenceModel.Predict(ctx, grpcPredOpts) + if tokenUsage.Prompt == 0 { + tokenUsage.Prompt = int(reply.PromptTokens) + } + if tokenUsage.Completion == 0 { + tokenUsage.Completion = int(reply.Tokens) + } if err != nil { rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} close(rawResultChannel) diff --git a/core/services/openai.go b/core/services/openai.go index 0f61d6f4..3fa041f5 100644 --- a/core/services/openai.go +++ b/core/services/openai.go @@ -160,7 +160,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest bc, request, err := oais.getConfig(request) if err != nil { - log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err) + log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration") return } @@ -259,7 +259,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest // If any of the setup goroutines experienced an error, quit early here. if setupError != nil { go func() { - log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError) + log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup") rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError} close(rawFinalResultChannel) }() @@ -603,7 +603,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche Usage: schema.OpenAIUsage{ PromptTokens: rawResult.Value.Usage.Prompt, CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, }, } @@ -644,7 +644,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche Usage: schema.OpenAIUsage{ PromptTokens: rawResult.Value.Usage.Prompt, CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, }, } From c751a4ac06bab3736d464d16cadf02a04f822bb5 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:47:51 -0500 Subject: [PATCH 0306/2895] fix: remove build path from help text documentation (#2037) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- docs/content/docs/advanced/advanced-usage.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index dace5803..4bd16030 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -384,6 +384,8 @@ docker run --env-file .env localai You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. Any command line parameter can be specified via an environment variable. +In the help text below, BASEPATH is the location that local-ai is being executed from + #### Global Flags | Parameter | Default | Description | Environment Variable | |-----------|---------|-------------|----------------------| @@ -393,13 +395,13 @@ You can control LocalAI with command line arguments, to specify a binding addres #### Storage Flags | Parameter | Default | Description | Environment Variable | |-----------|---------|-------------|----------------------| -| --models-path | /home/cryptk/Documents/sourcecode/LocalAI/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | +| --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | | --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH | | --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH | | --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH | | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | -| --localai-config-dir | /home/cryptk/Documents/sourcecode/LocalAI/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | +| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | | --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE | #### Models Flags From 538a086309b91f4594d5513c0fd88e981877a83d Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 15 Apr 2024 15:13:59 -0500 Subject: [PATCH 0307/2895] fix: previous CLI rework broke debug logging (#2036) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> Co-authored-by: Dave --- core/cli/run.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/cli/run.go b/core/cli/run.go index c3b186c0..cafc0b54 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -60,7 +60,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithYAMLConfigPreload(r.PreloadModelsConfig), config.WithModelPath(r.ModelsPath), config.WithContextSize(r.ContextSize), - config.WithDebug(ctx.Debug), + config.WithDebug(*ctx.LogLevel == "debug"), config.WithImageDir(r.ImagePath), config.WithAudioDir(r.AudioPath), config.WithUploadDir(r.UploadPath), From b72c6cc9fc6c16db301c2b0d992ba03c348f43b1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 20:52:39 +0000 Subject: [PATCH 0308/2895] build(deps): bump softprops/action-gh-release from 1 to 2 (#2039) Bumps [softprops/action-gh-release](https://github.com/softprops/action-gh-release) from 1 to 2. - [Release notes](https://github.com/softprops/action-gh-release/releases) - [Changelog](https://github.com/softprops/action-gh-release/blob/master/CHANGELOG.md) - [Commits](https://github.com/softprops/action-gh-release/compare/v1...v2) --- updated-dependencies: - dependency-name: softprops/action-gh-release dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 33c640cc..dc887fc1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -92,7 +92,7 @@ jobs: name: LocalAI-linux-${{ matrix.build }} path: release/ - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: | @@ -164,7 +164,7 @@ jobs: name: LocalAI-MacOS-${{ matrix.build }} path: release/ - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: | @@ -211,7 +211,7 @@ jobs: name: LocalAI-MacOS-arm64-${{ matrix.build }} path: release/ - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: | From 46609e936e5e644671855b004e89317300d3cfb9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 21:37:06 +0000 Subject: [PATCH 0309/2895] build(deps): bump dependabot/fetch-metadata from 1.3.4 to 2.0.0 (#2040) Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 1.3.4 to 2.0.0. - [Release notes](https://github.com/dependabot/fetch-metadata/releases) - [Commits](https://github.com/dependabot/fetch-metadata/compare/v1.3.4...v2.0.0) --- updated-dependencies: - dependency-name: dependabot/fetch-metadata dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index be3a40a8..8e32aee1 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v1.3.4 + uses: dependabot/fetch-metadata@v2.0.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" skip-commit-verification: true From 320d8a48d9bd09b5fda1c4330d8d693ccc705fcc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:02:44 +0000 Subject: [PATCH 0310/2895] build(deps): bump github/codeql-action from 2 to 3 (#2041) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2 to 3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v2...v3) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/secscan.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index 884b84d5..d9743d9e 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -24,7 +24,7 @@ jobs: args: '-no-fail -fmt sarif -out results.sarif ./...' - name: Upload SARIF file if: ${{ github.actor != 'dependabot[bot]' }} - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: # Path to SARIF file relative to the root of the repository sarif_file: results.sarif From cdece3879f4658eaccb3394d9e29b9534c8b773b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 16 Apr 2024 00:47:29 +0200 Subject: [PATCH 0311/2895] :arrow_up: Update ggerganov/llama.cpp (#2043) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 04745f39..37130567 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e +CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 0cc1ad21889d9dca21f71dfe7f47a87a0ddf0012 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 16 Apr 2024 01:27:52 +0200 Subject: [PATCH 0312/2895] :arrow_up: Update ggerganov/whisper.cpp (#2042) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 37130567..7cde8fa7 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c +WHISPER_CPP_VERSION?=9fab28135c7867bb7eccd9ebcd2ea8d52e42ca81 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From fdec8a9d00a034ccd8e075008edd165147edf328 Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 15 Apr 2024 21:46:36 -0400 Subject: [PATCH 0313/2895] fix: action-tmate back to upstream, dead code removal (#2038) cleanup: upstream action-tmate has taken my PR, drop master reference. Also remove dead code from api.go Signed-off-by: Dave Lee --- .github/workflows/test.yml | 6 +++--- core/http/api.go | 18 ------------------ 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 29bd3e08..156294b5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,7 +121,7 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: dave-gray101/action-tmate@master + uses: mxschmitt/action-tmate@v3.18 with: connect-timeout-seconds: 180 @@ -174,7 +174,7 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: dave-gray101/action-tmate@master + uses: mxschmitt/action-tmate@v3.18 with: connect-timeout-seconds: 180 @@ -209,6 +209,6 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: dave-gray101/action-tmate@master + uses: mxschmitt/action-tmate@v3.18 with: connect-timeout-seconds: 180 \ No newline at end of file diff --git a/core/http/api.go b/core/http/api.go index 5c9095ea..7094899a 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -108,24 +108,6 @@ func App(application *core.Application) (*fiber.App, error) { return c.Next() } - // // Check for api_keys.json file - // fileContent, err := os.ReadFile("api_keys.json") - // if err == nil { - // // Parse JSON content from the file - // var fileKeys []string - // err := json.Unmarshal(fileContent, &fileKeys) - // if err != nil { - // return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) - // } - - // // Add file keys to options.ApiKeys - // application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...) - // } - - // if len(application.ApplicationConfig.ApiKeys) == 0 { - // return c.Next() - // } - authHeader := readAuthHeader(c) if authHeader == "" { return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"}) From df4a13a08bf91491652e7baa65bd1eafa2c0ff44 Mon Sep 17 00:00:00 2001 From: Adrien Brault Date: Tue, 16 Apr 2024 11:10:23 +0200 Subject: [PATCH 0314/2895] docs: fix stores link (#2044) Signed-off-by: Adrien Brault --- docs/content/docs/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 6aede1d6..5224bc49 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -100,7 +100,7 @@ Note that this started just as a fun weekend project by [mudler](https://github. - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) - 🆕 [Vision API](https://localai.io/features/gpt-vision/) -- 💾 [Stores](https://localai.io/features/stores) +- 💾 [Stores](https://localai.io/stores) ## Contribute and help From 33c78d2228891caacb9d8bc7dc2c567caaf12a53 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 16 Apr 2024 15:54:14 +0200 Subject: [PATCH 0315/2895] feat(store): add Golang client (#1977) This adds a basic store client for Go Signed-off-by: Ettore Di Giacinto --- core/clients/store.go | 151 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 core/clients/store.go diff --git a/core/clients/store.go b/core/clients/store.go new file mode 100644 index 00000000..f737ee42 --- /dev/null +++ b/core/clients/store.go @@ -0,0 +1,151 @@ +package clients + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" +) + +// Define a struct to hold the store API client +type StoreClient struct { + BaseURL string + Client *http.Client +} + +type SetRequest struct { + Keys [][]float32 `json:"keys"` + Values []string `json:"values"` +} + +type GetRequest struct { + Keys [][]float32 `json:"keys"` +} + +type GetResponse struct { + Keys [][]float32 `json:"keys"` + Values []string `json:"values"` +} + +type DeleteRequest struct { + Keys [][]float32 `json:"keys"` +} + +type FindRequest struct { + TopK int `json:"topk"` + Key []float32 `json:"key"` +} + +type FindResponse struct { + Keys [][]float32 `json:"keys"` + Values []string `json:"values"` + Similarities []float32 `json:"similarities"` +} + +// Constructor for StoreClient +func NewStoreClient(baseUrl string) *StoreClient { + return &StoreClient{ + BaseURL: baseUrl, + Client: &http.Client{}, + } +} + +// Implement Set method +func (c *StoreClient) Set(req SetRequest) error { + return c.doRequest("stores/set", req) +} + +// Implement Get method +func (c *StoreClient) Get(req GetRequest) (*GetResponse, error) { + body, err := c.doRequestWithResponse("stores/get", req) + if err != nil { + return nil, err + } + + var resp GetResponse + err = json.Unmarshal(body, &resp) + if err != nil { + return nil, err + } + + return &resp, nil +} + +// Implement Delete method +func (c *StoreClient) Delete(req DeleteRequest) error { + return c.doRequest("stores/delete", req) +} + +// Implement Find method +func (c *StoreClient) Find(req FindRequest) (*FindResponse, error) { + body, err := c.doRequestWithResponse("stores/find", req) + if err != nil { + return nil, err + } + + var resp FindResponse + err = json.Unmarshal(body, &resp) + if err != nil { + return nil, err + } + + return &resp, nil +} + +// Helper function to perform a request without expecting a response body +func (c *StoreClient) doRequest(path string, data interface{}) error { + jsonData, err := json.Marshal(data) + if err != nil { + return err + } + + req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.Client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode) + } + + return nil +} + +// Helper function to perform a request and parse the response body +func (c *StoreClient) doRequestWithResponse(path string, data interface{}) ([]byte, error) { + jsonData, err := json.Marshal(data) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.Client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return body, nil +} From bcaa320f3611deb3c897b51c0240a186b51f21ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 19:49:54 +0000 Subject: [PATCH 0316/2895] build(deps): bump the pip group across 4 directories with 8 updates (#2049) Bumps the pip group with 1 update in the /examples/functions directory: [langchain](https://github.com/langchain-ai/langchain). Bumps the pip group with 2 updates in the /examples/langchain-chroma directory: [langchain](https://github.com/langchain-ai/langchain) and [llama-index](https://github.com/run-llama/llama_index). Bumps the pip group with 6 updates in the /examples/langchain/langchainpy-localai-example directory: | Package | From | To | | --- | --- | --- | | [langchain](https://github.com/langchain-ai/langchain) | `0.0.159` | `0.1.0` | | [aiohttp](https://github.com/aio-libs/aiohttp) | `3.8.4` | `3.9.2` | | [certifi](https://github.com/certifi/python-certifi) | `2022.12.7` | `2023.7.22` | | [idna](https://github.com/kjd/idna) | `3.4` | `3.7` | | [requests](https://github.com/psf/requests) | `2.29.0` | `2.31.0` | | [urllib3](https://github.com/urllib3/urllib3) | `1.26.15` | `1.26.18` | Bumps the pip group with 1 update in the /examples/streamlit-bot directory: [streamlit](https://github.com/streamlit/streamlit). Updates `langchain` from 0.0.234 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `langchain` from 0.0.160 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `llama-index` from 0.6.2 to 0.9.36 - [Release notes](https://github.com/run-llama/llama_index/releases) - [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md) - [Commits](https://github.com/run-llama/llama_index/compare/v0.6.2...v0.9.36) Updates `langchain` from 0.0.159 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `aiohttp` from 3.8.4 to 3.9.2 - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.4...v3.9.2) Updates `certifi` from 2022.12.7 to 2023.7.22 - [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2023.07.22) Updates `idna` from 3.4 to 3.7 - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7) Updates `requests` from 2.29.0 to 2.31.0 - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.29.0...v2.31.0) Updates `urllib3` from 1.26.15 to 1.26.18 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18) Updates `streamlit` from 1.26.0 to 1.30.0 - [Release notes](https://github.com/streamlit/streamlit/releases) - [Commits](https://github.com/streamlit/streamlit/compare/1.26.0...1.30.0) --- updated-dependencies: - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: llama-index dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: aiohttp dependency-type: direct:production dependency-group: pip - dependency-name: certifi dependency-type: direct:production dependency-group: pip - dependency-name: idna dependency-type: direct:production dependency-group: pip - dependency-name: requests dependency-type: direct:production dependency-group: pip - dependency-name: urllib3 dependency-type: direct:production dependency-group: pip - dependency-name: streamlit dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- examples/functions/requirements.txt | 2 +- examples/langchain-chroma/requirements.txt | 4 ++-- .../langchainpy-localai-example/requirements.txt | 12 ++++++------ examples/streamlit-bot/requirements.txt | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt index 7164e011..759c5b03 100644 --- a/examples/functions/requirements.txt +++ b/examples/functions/requirements.txt @@ -1,2 +1,2 @@ -langchain==0.0.234 +langchain==0.1.0 openai==0.27.8 diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt index b9e649c5..cdf466b9 100644 --- a/examples/langchain-chroma/requirements.txt +++ b/examples/langchain-chroma/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.0.160 +langchain==0.1.0 openai==0.27.6 chromadb==0.3.21 -llama-index==0.6.2 \ No newline at end of file +llama-index==0.9.36 \ No newline at end of file diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 2de5bcf0..1e63b0bf 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,16 +1,16 @@ -aiohttp==3.8.4 +aiohttp==3.9.2 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 -certifi==2022.12.7 +certifi==2023.7.22 charset-normalizer==3.1.0 colorama==0.4.6 dataclasses-json==0.5.7 debugpy==1.6.7 frozenlist==1.3.3 greenlet==2.0.2 -idna==3.4 -langchain==0.0.159 +idna==3.7 +langchain==0.1.0 marshmallow==3.19.0 marshmallow-enum==1.5.1 multidict==6.0.4 @@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4 packaging==23.1 pydantic==1.10.7 PyYAML==6.0 -requests==2.29.0 +requests==2.31.0 SQLAlchemy==2.0.12 tenacity==8.2.2 tqdm==4.65.0 typing-inspect==0.8.0 typing_extensions==4.5.0 -urllib3==1.26.15 +urllib3==1.26.18 yarl==1.9.2 diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt index ae527c76..1fcd5093 100644 --- a/examples/streamlit-bot/requirements.txt +++ b/examples/streamlit-bot/requirements.txt @@ -1,2 +1,2 @@ -streamlit==1.26.0 +streamlit==1.30.0 requests \ No newline at end of file From 6b06d4e0af4db7a8aa8e131ec2b3af171934862e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 16 Apr 2024 23:20:11 +0200 Subject: [PATCH 0317/2895] fix(fncall): fix regression introduced in #1963 (#2048) Signed-off-by: Dave --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: Dave Co-authored-by: Dave --- core/services/openai.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/services/openai.go b/core/services/openai.go index 3fa041f5..7a2679ad 100644 --- a/core/services/openai.go +++ b/core/services/openai.go @@ -778,13 +778,16 @@ func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) ss := map[string]interface{}{} // This prevent newlines to break JSON parsing for clients - // s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(llmresult), &ss) + s := utils.EscapeNewLines(llmresult) + if err := json.Unmarshal([]byte(s), &ss); err != nil { + log.Error().Msgf("error unmarshalling JSON: %s", err.Error()) + return results + } // The grammar defines the function name as "function", while OpenAI returns "name" func_name, ok := ss["function"] if !ok { - log.Debug().Msg("ss[function] is not OK!") + log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult) return results } // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object From 5763dc161376c86e4611ee9b7be54073a4fccf5b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 16 Apr 2024 23:37:50 +0200 Subject: [PATCH 0318/2895] :arrow_up: Update ggerganov/whisper.cpp (#2050) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7cde8fa7..f5b4dc2a 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=9fab28135c7867bb7eccd9ebcd2ea8d52e42ca81 +WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From af8c705ecd1ec47ca1254d7e7b8ab7ca7da89b57 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 23:17:25 +0200 Subject: [PATCH 0319/2895] :arrow_up: Update ggerganov/whisper.cpp (#2060) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f5b4dc2a..fdc7aade 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387 +WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From af9e5a2d05d477eedaf1bff08370208d2b4a9d86 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 17 Apr 2024 23:33:49 +0200 Subject: [PATCH 0320/2895] Revert #1963 (#2056) * Revert "fix(fncall): fix regression introduced in #1963 (#2048)" This reverts commit 6b06d4e0af4db7a8aa8e131ec2b3af171934862e. * Revert "fix: action-tmate back to upstream, dead code removal (#2038)" This reverts commit fdec8a9d00a034ccd8e075008edd165147edf328. * Revert "feat(grpc): return consumed token count and update response accordingly (#2035)" This reverts commit e843d7df0e8b177ab122a9f7bfa7196274ccd204. * Revert "refactor: backend/service split, channel-based llm flow (#1963)" This reverts commit eed5706994a3e770a0194cad9d1cfd724ba1b10a. * feat(grpc): return consumed token count and update response accordingly Fixes: #1920 Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/test.yml | 15 +- Makefile | 18 +- backend/go/transcribe/transcript.go | 6 +- backend/go/transcribe/whisper.go | 2 +- core/backend/embeddings.go | 90 +- core/backend/image.go | 259 +----- core/backend/llm.go | 271 ++---- core/backend/options.go | 84 +- core/backend/transcript.go | 41 +- core/backend/tts.go | 77 +- core/cli/run.go | 8 +- core/cli/transcript.go | 19 +- core/cli/tts.go | 26 +- core/config/backend_config.go | 301 ++++++- core/config/backend_config_loader.go | 509 ----------- core/config/exports_test.go | 6 - core/http/api.go | 227 ++--- core/http/api_test.go | 98 +-- core/http/ctx/fiber.go | 65 +- core/http/endpoints/elevenlabs/tts.go | 39 +- .../http/endpoints/localai/backend_monitor.go | 4 +- core/http/endpoints/localai/tts.go | 39 +- core/http/endpoints/openai/assistant.go | 2 +- core/http/endpoints/openai/chat.go | 621 ++++++++++++-- core/http/endpoints/openai/completion.go | 163 +++- core/http/endpoints/openai/edit.go | 78 +- core/http/endpoints/openai/embeddings.go | 65 +- core/http/endpoints/openai/image.go | 216 ++++- core/http/endpoints/openai/inference.go | 55 ++ core/http/endpoints/openai/list.go | 52 +- core/http/endpoints/openai/request.go | 285 ++++++ core/http/endpoints/openai/transcription.go | 28 +- core/schema/{transcription.go => whisper.go} | 2 +- core/services/backend_monitor.go | 30 +- core/services/gallery.go | 116 +-- core/services/list_models.go | 72 -- core/services/openai.go | 808 ------------------ core/startup/startup.go | 91 +- core/state.go | 41 - .../llm text/-completions Stream.bru | 25 - pkg/concurrency/concurrency.go | 135 --- pkg/concurrency/concurrency_test.go | 101 --- pkg/concurrency/types.go | 6 - pkg/grpc/backend.go | 2 +- pkg/grpc/base/base.go | 4 +- pkg/grpc/client.go | 4 +- pkg/grpc/embed.go | 4 +- pkg/grpc/interface.go | 2 +- pkg/model/initializers.go | 8 +- pkg/startup/model_preload.go | 85 ++ .../startup}/model_preload_test.go | 5 +- pkg/utils/base64.go | 50 -- 52 files changed, 2295 insertions(+), 3065 deletions(-) delete mode 100644 core/config/backend_config_loader.go delete mode 100644 core/config/exports_test.go create mode 100644 core/http/endpoints/openai/inference.go create mode 100644 core/http/endpoints/openai/request.go rename core/schema/{transcription.go => whisper.go} (90%) delete mode 100644 core/services/list_models.go delete mode 100644 core/services/openai.go delete mode 100644 core/state.go delete mode 100644 examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru delete mode 100644 pkg/concurrency/concurrency.go delete mode 100644 pkg/concurrency/concurrency_test.go delete mode 100644 pkg/concurrency/types.go create mode 100644 pkg/startup/model_preload.go rename {core/services => pkg/startup}/model_preload_test.go (96%) delete mode 100644 pkg/utils/base64.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 156294b5..46c4e065 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,9 +121,8 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 - with: - connect-timeout-seconds: 180 + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 tests-aio-container: runs-on: ubuntu-latest @@ -174,9 +173,8 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 - with: - connect-timeout-seconds: 180 + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 tests-apple: runs-on: macOS-14 @@ -209,6 +207,5 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 - with: - connect-timeout-seconds: 180 \ No newline at end of file + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 \ No newline at end of file diff --git a/Makefile b/Makefile index fdc7aade..6715e91e 100644 --- a/Makefile +++ b/Makefile @@ -301,9 +301,6 @@ clean-tests: rm -rf test-dir rm -rf core/http/backend-assets -halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually - ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {} - ## Build: build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) @@ -368,13 +365,13 @@ run-e2e-image: run-e2e-aio: @echo 'Running e2e AIO tests' - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio test-e2e: @echo 'Running e2e tests' BUILD_TYPE=$(BUILD_TYPE) \ LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e teardown-e2e: rm -rf $(TEST_DIR) || true @@ -382,15 +379,15 @@ teardown-e2e: test-gpt4all: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS) test-llama: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS) test-llama-gguf: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS) test-tts: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ @@ -648,10 +645,7 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ -# EXPERIMENTAL: -ifeq ($(BUILD_TYPE),metal) - cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/ -endif + backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index b38d5b9f..fdfaa974 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -21,7 +21,7 @@ func runCommand(command []string) (string, error) { // AudioToWav converts audio to wav for transcribe. // TODO: use https://github.com/mccoyst/ogg? func audioToWav(src, dst string) error { - command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} + command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} out, err := runCommand(command) if err != nil { return fmt.Errorf("error: %w out: %s", err, out) @@ -29,8 +29,8 @@ func audioToWav(src, dst string) error { return nil } -func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) { - res := schema.TranscriptionResult{} +func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) { + res := schema.Result{} dir, err := os.MkdirTemp("", "whisper") if err != nil { diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go index a9a62d24..ac93be01 100644 --- a/backend/go/transcribe/whisper.go +++ b/backend/go/transcribe/whisper.go @@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error { return err } -func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) { +func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) { return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads)) } diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 2c63dedc..03ff90b9 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -2,100 +2,14 @@ package backend import ( "fmt" - "time" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" ) -type EmbeddingsBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig -} - -func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService { - return &EmbeddingsBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, - } -} - -func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { - - resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - go func(request *schema.OpenAIRequest) { - if request.Model == "" { - request.Model = model.StableDiffusionBackend - } - - bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - items := []schema.Item{} - - for i, s := range bc.InputToken { - // get the model function to call for the result - embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - embeddings, err := embedFn() - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - for i, s := range bc.InputStrings { - // get the model function to call for the result - embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - embeddings, err := embedFn() - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Data: items, - Object: "list", - } - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} - close(resultChannel) - }(request) - return resultChannel -} - -func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { +func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { modelFile := backendConfig.Model grpcOpts := gRPCModelOpts(backendConfig) diff --git a/core/backend/image.go b/core/backend/image.go index affb3bb3..b0cffb0b 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -1,252 +1,18 @@ package backend import ( - "bufio" - "encoding/base64" - "fmt" - "io" - "net/http" - "os" - "path/filepath" - "strconv" - "strings" - "time" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" - "github.com/rs/zerolog/log" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" ) -type ImageGenerationBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig - BaseUrlForGeneratedImages string -} - -func NewImageGenerationBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ImageGenerationBackendService { - return &ImageGenerationBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, - } -} - -func (igbs *ImageGenerationBackendService) GenerateImage(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { - resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - go func(request *schema.OpenAIRequest) { - bc, request, err := igbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, igbs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - src := "" - if request.File != "" { - - var fileData []byte - // check if input.File is an URL, if so download it and save it - // to a temporary file - if strings.HasPrefix(request.File, "http://") || strings.HasPrefix(request.File, "https://") { - out, err := downloadFile(request.File) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed downloading file:%w", err)} - close(resultChannel) - return - } - defer os.RemoveAll(out) - - fileData, err = os.ReadFile(out) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed reading file:%w", err)} - close(resultChannel) - return - } - - } else { - // base 64 decode the file and write it somewhere - // that we will cleanup - fileData, err = base64.StdEncoding.DecodeString(request.File) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - } - - // Create a temporary file - outputFile, err := os.CreateTemp(igbs.appConfig.ImageDir, "b64") - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - // write the base64 result - writer := bufio.NewWriter(outputFile) - _, err = writer.Write(fileData) - if err != nil { - outputFile.Close() - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - outputFile.Close() - src = outputFile.Name() - defer os.RemoveAll(src) - } - - log.Debug().Msgf("Parameter Config: %+v", bc) - - switch bc.Backend { - case "stablediffusion": - bc.Backend = model.StableDiffusionBackend - case "tinydream": - bc.Backend = model.TinyDreamBackend - case "": - bc.Backend = model.StableDiffusionBackend - if bc.Model == "" { - bc.Model = "stablediffusion_assets" // TODO: check? - } - } - - sizeParts := strings.Split(request.Size, "x") - if len(sizeParts) != 2 { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} - close(resultChannel) - return - } - width, err := strconv.Atoi(sizeParts[0]) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} - close(resultChannel) - return - } - height, err := strconv.Atoi(sizeParts[1]) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} - close(resultChannel) - return - } - - b64JSON := false - if request.ResponseFormat.Type == "b64_json" { - b64JSON = true - } - // src and clip_skip - var result []schema.Item - for _, i := range bc.PromptStrings { - n := request.N - if request.N == 0 { - n = 1 - } - for j := 0; j < n; j++ { - prompts := strings.Split(i, "|") - positive_prompt := prompts[0] - negative_prompt := "" - if len(prompts) > 1 { - negative_prompt = prompts[1] - } - - mode := 0 - step := bc.Step - if step == 0 { - step = 15 - } - - if request.Mode != 0 { - mode = request.Mode - } - - if request.Step != 0 { - step = request.Step - } - - tempDir := "" - if !b64JSON { - tempDir = igbs.appConfig.ImageDir - } - // Create a temporary file - outputFile, err := os.CreateTemp(tempDir, "b64") - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - outputFile.Close() - output := outputFile.Name() + ".png" - // Rename the temporary file - err = os.Rename(outputFile.Name(), output) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - if request.Seed == nil { - zVal := 0 // Idiomatic way to do this? Actually needed? - request.Seed = &zVal - } - - fn, err := imageGeneration(height, width, mode, step, *request.Seed, positive_prompt, negative_prompt, src, output, igbs.ml, bc, igbs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - if err := fn(); err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - item := &schema.Item{} - - if b64JSON { - defer os.RemoveAll(output) - data, err := os.ReadFile(output) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - item.B64JSON = base64.StdEncoding.EncodeToString(data) - } else { - base := filepath.Base(output) - item.URL = igbs.BaseUrlForGeneratedImages + base - } - - result = append(result, *item) - } - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Data: result, - } - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} - close(resultChannel) - }(request) - return resultChannel -} - -func imageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { - +func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { threads := backendConfig.Threads if *threads == 0 && appConfig.Threads != 0 { threads = &appConfig.Threads } - gRPCOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), @@ -284,24 +50,3 @@ func imageGeneration(height, width, mode, step, seed int, positive_prompt, negat return fn, nil } - -// TODO: Replace this function with pkg/downloader - no reason to have a (crappier) bespoke download file fn here, but get things working before that change. -func downloadFile(url string) (string, error) { - // Get the data - resp, err := http.Get(url) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // Create the file - out, err := os.CreateTemp("", "image") - if err != nil { - return "", err - } - defer out.Close() - - // Write the body to file - _, err = io.Copy(out, resp.Body) - return out.Name(), err -} diff --git a/core/backend/llm.go b/core/backend/llm.go index 75766d78..a4d1e5f3 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -11,22 +11,17 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/rs/zerolog/log" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -type LLMRequest struct { - Id int // TODO Remove if not used. - Text string - Images []string - RawMessages []schema.Message - // TODO: Other Modalities? +type LLMResponse struct { + Response string // should this be []byte? + Usage TokenUsage } type TokenUsage struct { @@ -34,94 +29,57 @@ type TokenUsage struct { Completion int } -type LLMResponse struct { - Request *LLMRequest - Response string // should this be []byte? - Usage TokenUsage -} - -// TODO: Does this belong here or in core/services/openai.go? -type LLMResponseBundle struct { - Request *schema.OpenAIRequest - Response []schema.Choice - Usage TokenUsage -} - -type LLMBackendService struct { - bcl *config.BackendConfigLoader - ml *model.ModelLoader - appConfig *config.ApplicationConfig - ftMutex sync.Mutex - cutstrings map[string]*regexp.Regexp -} - -func NewLLMBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *LLMBackendService { - return &LLMBackendService{ - bcl: bcl, - ml: ml, - appConfig: appConfig, - ftMutex: sync.Mutex{}, - cutstrings: make(map[string]*regexp.Regexp), +func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { + modelFile := c.Model + threads := c.Threads + if *threads == 0 && o.Threads != 0 { + threads = &o.Threads } -} - -// TODO: Should ctx param be removed and replaced with hardcoded req.Context? -func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, bc *config.BackendConfig, enableTokenChannel bool) ( - resultChannel <-chan concurrency.ErrorOr[*LLMResponse], tokenChannel <-chan concurrency.ErrorOr[*LLMResponse], err error) { - - threads := bc.Threads - if (threads == nil || *threads == 0) && llmbs.appConfig.Threads != 0 { - threads = &llmbs.appConfig.Threads - } - - grpcOpts := gRPCModelOpts(bc) + grpcOpts := gRPCModelOpts(c) var inferenceModel grpc.Backend + var err error - opts := modelOpts(bc, llmbs.appConfig, []model.Option{ + opts := modelOpts(c, o, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup - model.WithAssetDir(llmbs.appConfig.AssetsDestination), - model.WithModel(bc.Model), - model.WithContext(llmbs.appConfig.Context), + model.WithAssetDir(o.AssetsDestination), + model.WithModel(modelFile), + model.WithContext(o.Context), }) - if bc.Backend != "" { - opts = append(opts, model.WithBackendString(bc.Backend)) + if c.Backend != "" { + opts = append(opts, model.WithBackendString(c.Backend)) } - // Check if bc.Model exists, if it doesn't try to load it from the gallery - if llmbs.appConfig.AutoloadGalleries { // experimental - if _, err := os.Stat(bc.Model); os.IsNotExist(err) { + // Check if the modelFile exists, if it doesn't try to load it from the gallery + if o.AutoloadGalleries { // experimental + if _, err := os.Stat(modelFile); os.IsNotExist(err) { utils.ResetDownloadTimers() // if we failed to load the model, we try to download it - err := gallery.InstallModelFromGalleryByName(llmbs.appConfig.Galleries, bc.Model, llmbs.appConfig.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) + err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) if err != nil { - return nil, nil, err + return nil, err } } } - if bc.Backend == "" { - log.Debug().Msgf("backend not known for %q, falling back to greedy loader to find it", bc.Model) - inferenceModel, err = llmbs.ml.GreedyLoader(opts...) + if c.Backend == "" { + inferenceModel, err = loader.GreedyLoader(opts...) } else { - inferenceModel, err = llmbs.ml.BackendLoader(opts...) + inferenceModel, err = loader.BackendLoader(opts...) } if err != nil { - log.Error().Err(err).Msg("[llmbs.Inference] failed to load a backend") - return + return nil, err } - grpcPredOpts := gRPCPredictOpts(bc, llmbs.appConfig.ModelPath) - grpcPredOpts.Prompt = req.Text - grpcPredOpts.Images = req.Images - - if bc.TemplateConfig.UseTokenizerTemplate && req.Text == "" { - grpcPredOpts.UseTokenizerTemplate = true - protoMessages := make([]*proto.Message, len(req.RawMessages), len(req.RawMessages)) - for i, message := range req.RawMessages { + var protoMessages []*proto.Message + // if we are using the tokenizer template, we need to convert the messages to proto messages + // unless the prompt has already been tokenized (non-chat endpoints + functions) + if c.TemplateConfig.UseTokenizerTemplate && s == "" { + protoMessages = make([]*proto.Message, len(messages), len(messages)) + for i, message := range messages { protoMessages[i] = &proto.Message{ Role: message.Role, } @@ -129,32 +87,47 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, case string: protoMessages[i].Content = ct default: - err = fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct) - return + return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct) } } } - tokenUsage := TokenUsage{} + // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported + fn := func() (LLMResponse, error) { + opts := gRPCPredictOpts(c, loader.ModelPath) + opts.Prompt = s + opts.Messages = protoMessages + opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate + opts.Images = images - promptInfo, pErr := inferenceModel.TokenizeString(ctx, grpcPredOpts) - if pErr == nil && promptInfo.Length > 0 { - tokenUsage.Prompt = int(promptInfo.Length) - } + tokenUsage := TokenUsage{} - rawResultChannel := make(chan concurrency.ErrorOr[*LLMResponse]) - // TODO this next line is the biggest argument for taking named return values _back_ out!!! - var rawTokenChannel chan concurrency.ErrorOr[*LLMResponse] + // check the per-model feature flag for usage, since tokenCallback may have a cost. + // Defaults to off as for now it is still experimental + if c.FeatureFlag.Enabled("usage") { + userTokenCallback := tokenCallback + if userTokenCallback == nil { + userTokenCallback = func(token string, usage TokenUsage) bool { + return true + } + } - if enableTokenChannel { - rawTokenChannel = make(chan concurrency.ErrorOr[*LLMResponse]) + promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts) + if pErr == nil && promptInfo.Length > 0 { + tokenUsage.Prompt = int(promptInfo.Length) + } - // TODO Needs better name - ss := "" + tokenCallback = func(token string, usage TokenUsage) bool { + tokenUsage.Completion++ + return userTokenCallback(token, tokenUsage) + } + } + + if tokenCallback != nil { + ss := "" - go func() { var partialRune []byte - err := inferenceModel.PredictStream(ctx, grpcPredOpts, func(chars []byte) { + err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { partialRune = append(partialRune, chars...) for len(partialRune) > 0 { @@ -164,126 +137,54 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, break } - tokenUsage.Completion++ - rawTokenChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ - Response: string(r), - Usage: tokenUsage, - }} - + tokenCallback(string(r), tokenUsage) ss += string(r) partialRune = partialRune[size:] } }) - close(rawTokenChannel) + return LLMResponse{ + Response: ss, + Usage: tokenUsage, + }, err + } else { + // TODO: Is the chicken bit the only way to get here? is that acceptable? + reply, err := inferenceModel.Predict(ctx, opts) if err != nil { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} - } else { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ - Response: ss, - Usage: tokenUsage, - }} + return LLMResponse{}, err } - close(rawResultChannel) - }() - } else { - go func() { - reply, err := inferenceModel.Predict(ctx, grpcPredOpts) if tokenUsage.Prompt == 0 { tokenUsage.Prompt = int(reply.PromptTokens) } if tokenUsage.Completion == 0 { tokenUsage.Completion = int(reply.Tokens) } - if err != nil { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} - close(rawResultChannel) - } else { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ - Response: string(reply.Message), - Usage: tokenUsage, - }} - close(rawResultChannel) - } - }() + return LLMResponse{ + Response: string(reply.Message), + Usage: tokenUsage, + }, err + } } - resultChannel = rawResultChannel - tokenChannel = rawTokenChannel - return + return fn, nil } -// TODO: Should predInput be a seperate param still, or should this fn handle extracting it from request?? -func (llmbs *LLMBackendService) GenerateText(predInput string, request *schema.OpenAIRequest, bc *config.BackendConfig, - mappingFn func(*LLMResponse) schema.Choice, enableCompletionChannels bool, enableTokenChannels bool) ( - // Returns: - resultChannel <-chan concurrency.ErrorOr[*LLMResponseBundle], completionChannels []<-chan concurrency.ErrorOr[*LLMResponse], tokenChannels []<-chan concurrency.ErrorOr[*LLMResponse], err error) { +var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp) +var mu sync.Mutex = sync.Mutex{} - rawChannel := make(chan concurrency.ErrorOr[*LLMResponseBundle]) - resultChannel = rawChannel - - if request.N == 0 { // number of completions to return - request.N = 1 - } - images := []string{} - for _, m := range request.Messages { - images = append(images, m.StringImages...) - } - - for i := 0; i < request.N; i++ { - - individualResultChannel, tokenChannel, infErr := llmbs.Inference(request.Context, &LLMRequest{ - Text: predInput, - Images: images, - RawMessages: request.Messages, - }, bc, enableTokenChannels) - if infErr != nil { - err = infErr // Avoids complaints about redeclaring err but looks dumb - return - } - completionChannels = append(completionChannels, individualResultChannel) - tokenChannels = append(tokenChannels, tokenChannel) - } - - go func() { - initialBundle := LLMResponseBundle{ - Request: request, - Response: []schema.Choice{}, - Usage: TokenUsage{}, - } - - wg := concurrency.SliceOfChannelsReducer(completionChannels, rawChannel, func(iv concurrency.ErrorOr[*LLMResponse], ov concurrency.ErrorOr[*LLMResponseBundle]) concurrency.ErrorOr[*LLMResponseBundle] { - if iv.Error != nil { - ov.Error = iv.Error - // TODO: Decide if we should wipe partials or not? - return ov - } - ov.Value.Usage.Prompt += iv.Value.Usage.Prompt - ov.Value.Usage.Completion += iv.Value.Usage.Completion - - ov.Value.Response = append(ov.Value.Response, mappingFn(iv.Value)) - return ov - }, concurrency.ErrorOr[*LLMResponseBundle]{Value: &initialBundle}, true) - wg.Wait() - - }() - - return -} - -func (llmbs *LLMBackendService) Finetune(config config.BackendConfig, input, prediction string) string { +func Finetune(config config.BackendConfig, input, prediction string) string { if config.Echo { prediction = input + prediction } for _, c := range config.Cutstrings { - llmbs.ftMutex.Lock() - reg, ok := llmbs.cutstrings[c] + mu.Lock() + reg, ok := cutstrings[c] if !ok { - llmbs.cutstrings[c] = regexp.MustCompile(c) - reg = llmbs.cutstrings[c] + cutstrings[c] = regexp.MustCompile(c) + reg = cutstrings[c] } - llmbs.ftMutex.Unlock() + mu.Unlock() prediction = reg.ReplaceAllString(prediction, "") } diff --git a/core/backend/options.go b/core/backend/options.go index 0b4e56db..5b303b05 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -10,7 +10,7 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" ) -func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { +func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { if so.SingleBackend { opts = append(opts, model.WithSingleActiveBackend()) } @@ -19,12 +19,12 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo opts = append(opts, model.EnableParallelRequests) } - if bc.GRPC.Attempts != 0 { - opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts)) + if c.GRPC.Attempts != 0 { + opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts)) } - if bc.GRPC.AttemptsSleepTime != 0 { - opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime)) + if c.GRPC.AttemptsSleepTime != 0 { + opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) } for k, v := range so.ExternalGRPCBackends { @@ -34,7 +34,7 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo return opts } -func getSeed(c *config.BackendConfig) int32 { +func getSeed(c config.BackendConfig) int32 { seed := int32(*c.Seed) if seed == config.RAND_SEED { seed = rand.Int31() @@ -43,7 +43,7 @@ func getSeed(c *config.BackendConfig) int32 { return seed } -func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions { +func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -104,47 +104,47 @@ func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions { } } -func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions { +func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { promptCachePath := "" - if bc.PromptCachePath != "" { - p := filepath.Join(modelPath, bc.PromptCachePath) + if c.PromptCachePath != "" { + p := filepath.Join(modelPath, c.PromptCachePath) os.MkdirAll(filepath.Dir(p), 0755) promptCachePath = p } return &pb.PredictOptions{ - Temperature: float32(*bc.Temperature), - TopP: float32(*bc.TopP), - NDraft: bc.NDraft, - TopK: int32(*bc.TopK), - Tokens: int32(*bc.Maxtokens), - Threads: int32(*bc.Threads), - PromptCacheAll: bc.PromptCacheAll, - PromptCacheRO: bc.PromptCacheRO, + Temperature: float32(*c.Temperature), + TopP: float32(*c.TopP), + NDraft: c.NDraft, + TopK: int32(*c.TopK), + Tokens: int32(*c.Maxtokens), + Threads: int32(*c.Threads), + PromptCacheAll: c.PromptCacheAll, + PromptCacheRO: c.PromptCacheRO, PromptCachePath: promptCachePath, - F16KV: *bc.F16, - DebugMode: *bc.Debug, - Grammar: bc.Grammar, - NegativePromptScale: bc.NegativePromptScale, - RopeFreqBase: bc.RopeFreqBase, - RopeFreqScale: bc.RopeFreqScale, - NegativePrompt: bc.NegativePrompt, - Mirostat: int32(*bc.LLMConfig.Mirostat), - MirostatETA: float32(*bc.LLMConfig.MirostatETA), - MirostatTAU: float32(*bc.LLMConfig.MirostatTAU), - Debug: *bc.Debug, - StopPrompts: bc.StopWords, - Repeat: int32(bc.RepeatPenalty), - NKeep: int32(bc.Keep), - Batch: int32(bc.Batch), - IgnoreEOS: bc.IgnoreEOS, - Seed: getSeed(bc), - FrequencyPenalty: float32(bc.FrequencyPenalty), - MLock: *bc.MMlock, - MMap: *bc.MMap, - MainGPU: bc.MainGPU, - TensorSplit: bc.TensorSplit, - TailFreeSamplingZ: float32(*bc.TFZ), - TypicalP: float32(*bc.TypicalP), + F16KV: *c.F16, + DebugMode: *c.Debug, + Grammar: c.Grammar, + NegativePromptScale: c.NegativePromptScale, + RopeFreqBase: c.RopeFreqBase, + RopeFreqScale: c.RopeFreqScale, + NegativePrompt: c.NegativePrompt, + Mirostat: int32(*c.LLMConfig.Mirostat), + MirostatETA: float32(*c.LLMConfig.MirostatETA), + MirostatTAU: float32(*c.LLMConfig.MirostatTAU), + Debug: *c.Debug, + StopPrompts: c.StopWords, + Repeat: int32(c.RepeatPenalty), + NKeep: int32(c.Keep), + Batch: int32(c.Batch), + IgnoreEOS: c.IgnoreEOS, + Seed: getSeed(c), + FrequencyPenalty: float32(c.FrequencyPenalty), + MLock: *c.MMlock, + MMap: *c.MMap, + MainGPU: c.MainGPU, + TensorSplit: c.TensorSplit, + TailFreeSamplingZ: float32(*c.TFZ), + TypicalP: float32(*c.TypicalP), } } diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 6761c2ac..4c3859df 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -7,48 +7,11 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" ) -type TranscriptionBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig -} - -func NewTranscriptionBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TranscriptionBackendService { - return &TranscriptionBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, - } -} - -func (tbs *TranscriptionBackendService) Transcribe(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.TranscriptionResult] { - responseChannel := make(chan concurrency.ErrorOr[*schema.TranscriptionResult]) - go func(request *schema.OpenAIRequest) { - bc, request, err := tbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, tbs.appConfig) - if err != nil { - responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: fmt.Errorf("failed reading parameters from request:%w", err)} - close(responseChannel) - return - } - - tr, err := modelTranscription(request.File, request.Language, tbs.ml, bc, tbs.appConfig) - if err != nil { - responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: err} - close(responseChannel) - return - } - responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Value: tr} - close(responseChannel) - }(request) - return responseChannel -} - -func modelTranscription(audio, language string, ml *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { +func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) { opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(model.WhisperBackend), diff --git a/core/backend/tts.go b/core/backend/tts.go index d1fa270d..f97b6202 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -7,60 +7,29 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -type TextToSpeechBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig -} +func generateUniqueFileName(dir, baseName, ext string) string { + counter := 1 + fileName := baseName + ext -func NewTextToSpeechBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TextToSpeechBackendService { - return &TextToSpeechBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, + for { + filePath := filepath.Join(dir, fileName) + _, err := os.Stat(filePath) + if os.IsNotExist(err) { + return fileName + } + + counter++ + fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) } } -func (ttsbs *TextToSpeechBackendService) TextToAudioFile(request *schema.TTSRequest) <-chan concurrency.ErrorOr[*string] { - responseChannel := make(chan concurrency.ErrorOr[*string]) - go func(request *schema.TTSRequest) { - cfg, err := ttsbs.bcl.LoadBackendConfigFileByName(request.Model, ttsbs.appConfig.ModelPath, - config.LoadOptionDebug(ttsbs.appConfig.Debug), - config.LoadOptionThreads(ttsbs.appConfig.Threads), - config.LoadOptionContextSize(ttsbs.appConfig.ContextSize), - config.LoadOptionF16(ttsbs.appConfig.F16), - ) - if err != nil { - responseChannel <- concurrency.ErrorOr[*string]{Error: err} - close(responseChannel) - return - } - - if request.Backend != "" { - cfg.Backend = request.Backend - } - - outFile, _, err := modelTTS(cfg.Backend, request.Input, cfg.Model, request.Voice, ttsbs.ml, ttsbs.appConfig, cfg) - if err != nil { - responseChannel <- concurrency.ErrorOr[*string]{Error: err} - close(responseChannel) - return - } - responseChannel <- concurrency.ErrorOr[*string]{Value: &outFile} - close(responseChannel) - }(request) - return responseChannel -} - -func modelTTS(backend, text, modelFile string, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig *config.BackendConfig) (string, *proto.Result, error) { +func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend @@ -68,7 +37,7 @@ func modelTTS(backend, text, modelFile string, voice string, loader *model.Model grpcOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(&config.BackendConfig{}, appConfig, []model.Option{ + opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), model.WithContext(appConfig.Context), @@ -118,19 +87,3 @@ func modelTTS(backend, text, modelFile string, voice string, loader *model.Model return filePath, res, err } - -func generateUniqueFileName(dir, baseName, ext string) string { - counter := 1 - fileName := baseName + ext - - for { - filePath := filepath.Join(dir, fileName) - _, err := os.Stat(filePath) - if os.IsNotExist(err) { - return fileName - } - - counter++ - fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) - } -} diff --git a/core/cli/run.go b/core/cli/run.go index cafc0b54..0f3ba2de 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -124,11 +124,11 @@ func (r *RunCMD) Run(ctx *Context) error { } if r.PreloadBackendOnly { - _, err := startup.Startup(opts...) + _, _, _, err := startup.Startup(opts...) return err } - application, err := startup.Startup(opts...) + cl, ml, options, err := startup.Startup(opts...) if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) @@ -137,7 +137,7 @@ func (r *RunCMD) Run(ctx *Context) error { // Watch the configuration directory // If the directory does not exist, we don't watch it if _, err := os.Stat(r.LocalaiConfigDir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, application.ApplicationConfig) + closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) defer closeConfigWatcherFn() if err != nil { @@ -145,7 +145,7 @@ func (r *RunCMD) Run(ctx *Context) error { } } - appHTTP, err := http.App(application) + appHTTP, err := http.App(cl, ml, options) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") return err diff --git a/core/cli/transcript.go b/core/cli/transcript.go index f14a1a87..9f36a77c 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -7,7 +7,6 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -44,21 +43,11 @@ func (t *TranscriptCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - tbs := backend.NewTranscriptionBackendService(ml, cl, opts) - - resultChannel := tbs.Transcribe(&schema.OpenAIRequest{ - PredictionOptions: schema.PredictionOptions{ - Language: t.Language, - }, - File: t.Filename, - }) - - r := <-resultChannel - - if r.Error != nil { - return r.Error + tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) + if err != nil { + return err } - for _, segment := range r.Value.Segments { + for _, segment := range tr.Segments { fmt.Println(segment.Start.String(), "-", segment.Text) } return nil diff --git a/core/cli/tts.go b/core/cli/tts.go index c7758c48..1d8fd3a3 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -9,7 +9,6 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -43,29 +42,20 @@ func (t *TTSCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - ttsbs := backend.NewTextToSpeechBackendService(ml, config.NewBackendConfigLoader(), opts) + options := config.BackendConfig{} + options.SetDefaults() - request := &schema.TTSRequest{ - Model: t.Model, - Input: text, - Backend: t.Backend, - Voice: t.Voice, - } - - resultsChannel := ttsbs.TextToAudioFile(request) - - rawResult := <-resultsChannel - - if rawResult.Error != nil { - return rawResult.Error + filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options) + if err != nil { + return err } if outputFile != "" { - if err := os.Rename(*rawResult.Value, outputFile); err != nil { + if err := os.Rename(filePath, outputFile); err != nil { return err } - fmt.Printf("Generated file %q\n", outputFile) + fmt.Printf("Generate file %s\n", outputFile) } else { - fmt.Printf("Generated file %q\n", *rawResult.Value) + fmt.Printf("Generate file %s\n", filePath) } return nil } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 47e4829d..81c92d01 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -1,7 +1,22 @@ package config import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" + "gopkg.in/yaml.v3" + + "github.com/charmbracelet/glamour" ) const ( @@ -184,7 +199,7 @@ func (c *BackendConfig) FunctionToCall() string { } func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { - lo := &ConfigLoaderOptions{} + lo := &LoadOptions{} lo.Apply(opts...) ctx := lo.ctxSize @@ -297,3 +312,287 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.Debug = &trueV } } + +////// Config Loader //////// + +type BackendConfigLoader struct { + configs map[string]BackendConfig + sync.Mutex +} + +type LoadOptions struct { + debug bool + threads, ctxSize int + f16 bool +} + +func LoadOptionDebug(debug bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.debug = debug + } +} + +func LoadOptionThreads(threads int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.threads = threads + } +} + +func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.ctxSize = ctxSize + } +} + +func LoadOptionF16(f16 bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.f16 = f16 + } +} + +type ConfigLoaderOption func(*LoadOptions) + +func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { + for _, l := range options { + l(lo) + } +} + +// Load a config file for a model +func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + + // Load a config file if present after the model name + cfg := &BackendConfig{ + PredictionOptions: schema.PredictionOptions{ + Model: modelName, + }, + } + + cfgExisting, exists := cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + // Try loading a model config file + modelConfig := filepath.Join(modelPath, modelName+".yaml") + if _, err := os.Stat(modelConfig); err == nil { + if err := cl.LoadBackendConfig( + modelConfig, opts..., + ); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } + } + } + + cfg.SetDefaults(opts...) + + return cfg, nil +} + +func NewBackendConfigLoader() *BackendConfigLoader { + return &BackendConfigLoader{ + configs: make(map[string]BackendConfig), + } +} +func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { + c := &[]*BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + for _, cc := range *c { + cc.SetDefaults(opts...) + } + + return *c, nil +} + +func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + lo := &LoadOptions{} + lo.Apply(opts...) + + c := &BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + c.SetDefaults(opts...) + return c, nil +} + +func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + c, err := ReadBackendConfigFile(file, opts...) + if err != nil { + return fmt.Errorf("cannot load config file: %w", err) + } + + for _, cc := range c { + cm.configs[cc.Name] = *cc + } + return nil +} + +func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { + cl.Lock() + defer cl.Unlock() + c, err := ReadBackendConfig(file, opts...) + if err != nil { + return fmt.Errorf("cannot read config file: %w", err) + } + + cl.configs[c.Name] = *c + return nil +} + +func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { + cl.Lock() + defer cl.Unlock() + v, exists := cl.configs[m] + return v, exists +} + +func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { + cl.Lock() + defer cl.Unlock() + var res []BackendConfig + for _, v := range cl.configs { + res = append(res, v) + } + + sort.SliceStable(res, func(i, j int) bool { + return res[i].Name < res[j].Name + }) + + return res +} + +func (cl *BackendConfigLoader) ListBackendConfigs() []string { + cl.Lock() + defer cl.Unlock() + var res []string + for k := range cl.configs { + res = append(res, k) + } + return res +} + +// Preload prepare models if they are not local but url or huggingface repositories +func (cl *BackendConfigLoader) Preload(modelPath string) error { + cl.Lock() + defer cl.Unlock() + + status := func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + } + + log.Info().Msgf("Preloading models from %s", modelPath) + + renderMode := "dark" + if os.Getenv("COLOR") != "" { + renderMode = os.Getenv("COLOR") + } + + glamText := func(t string) { + out, err := glamour.Render(t, renderMode) + if err == nil && os.Getenv("NO_COLOR") == "" { + fmt.Println(out) + } else { + fmt.Println(t) + } + } + + for i, config := range cl.configs { + + // Download files and verify their SHA + for _, file := range config.DownloadFiles { + log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) + + if err := utils.VerifyPath(file.Filename, modelPath); err != nil { + return err + } + // Create file path + filePath := filepath.Join(modelPath, file.Filename) + + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { + return err + } + } + + modelURL := config.PredictionOptions.Model + modelURL = downloader.ConvertURL(modelURL) + + if downloader.LooksLikeURL(modelURL) { + // md5 of model name + md5Name := utils.MD5(modelURL) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.PredictionOptions.Model = md5Name + cl.configs[i] = *c + } + if cl.configs[i].Name != "" { + glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) + } + if cl.configs[i].Description != "" { + //glamText("**Description**") + glamText(cl.configs[i].Description) + } + if cl.configs[i].Usage != "" { + //glamText("**Usage**") + glamText(cl.configs[i].Usage) + } + } + return nil +} + +// LoadBackendConfigsFromPath reads all the configurations of the models from a path +// (non-recursive) +func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + entries, err := os.ReadDir(path) + if err != nil { + return err + } + files := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return err + } + files = append(files, info) + } + for _, file := range files { + // Skip templates, YAML and .keep files + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { + continue + } + c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) + if err == nil { + cm.configs[c.Name] = *c + } + } + + return nil +} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go deleted file mode 100644 index 62dfc1e0..00000000 --- a/core/config/backend_config_loader.go +++ /dev/null @@ -1,509 +0,0 @@ -package config - -import ( - "encoding/json" - "errors" - "fmt" - "io/fs" - "os" - "path/filepath" - "sort" - "strings" - "sync" - - "github.com/charmbracelet/glamour" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/downloader" - "github.com/go-skynet/LocalAI/pkg/grammar" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" - "gopkg.in/yaml.v2" -) - -type BackendConfigLoader struct { - configs map[string]BackendConfig - sync.Mutex -} - -type ConfigLoaderOptions struct { - debug bool - threads, ctxSize int - f16 bool -} - -func LoadOptionDebug(debug bool) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.debug = debug - } -} - -func LoadOptionThreads(threads int) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.threads = threads - } -} - -func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.ctxSize = ctxSize - } -} - -func LoadOptionF16(f16 bool) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.f16 = f16 - } -} - -type ConfigLoaderOption func(*ConfigLoaderOptions) - -func (lo *ConfigLoaderOptions) Apply(options ...ConfigLoaderOption) { - for _, l := range options { - l(lo) - } -} - -func NewBackendConfigLoader() *BackendConfigLoader { - return &BackendConfigLoader{ - configs: make(map[string]BackendConfig), - } -} - -func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { - bcl.Lock() - defer bcl.Unlock() - c, err := readBackendConfig(file, opts...) - if err != nil { - return fmt.Errorf("cannot read config file: %w", err) - } - - bcl.configs[c.Name] = *c - return nil -} - -func (bcl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { - bcl.Lock() - defer bcl.Unlock() - v, exists := bcl.configs[m] - return v, exists -} - -func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { - bcl.Lock() - defer bcl.Unlock() - var res []BackendConfig - for _, v := range bcl.configs { - res = append(res, v) - } - sort.SliceStable(res, func(i, j int) bool { - return res[i].Name < res[j].Name - }) - return res -} - -func (bcl *BackendConfigLoader) ListBackendConfigs() []string { - bcl.Lock() - defer bcl.Unlock() - var res []string - for k := range bcl.configs { - res = append(res, k) - } - return res -} - -// Preload prepare models if they are not local but url or huggingface repositories -func (bcl *BackendConfigLoader) Preload(modelPath string) error { - bcl.Lock() - defer bcl.Unlock() - - status := func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - } - - log.Info().Msgf("Preloading models from %s", modelPath) - - renderMode := "dark" - if os.Getenv("COLOR") != "" { - renderMode = os.Getenv("COLOR") - } - - glamText := func(t string) { - out, err := glamour.Render(t, renderMode) - if err == nil && os.Getenv("NO_COLOR") == "" { - fmt.Println(out) - } else { - fmt.Println(t) - } - } - - for i, config := range bcl.configs { - - // Download files and verify their SHA - for _, file := range config.DownloadFiles { - log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) - - if err := utils.VerifyPath(file.Filename, modelPath); err != nil { - return err - } - // Create file path - filePath := filepath.Join(modelPath, file.Filename) - - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { - return err - } - } - - modelURL := config.PredictionOptions.Model - modelURL = downloader.ConvertURL(modelURL) - - if downloader.LooksLikeURL(modelURL) { - // md5 of model name - md5Name := utils.MD5(modelURL) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) - if err != nil { - return err - } - } - - cc := bcl.configs[i] - c := &cc - c.PredictionOptions.Model = md5Name - bcl.configs[i] = *c - } - if bcl.configs[i].Name != "" { - glamText(fmt.Sprintf("**Model name**: _%s_", bcl.configs[i].Name)) - } - if bcl.configs[i].Description != "" { - //glamText("**Description**") - glamText(bcl.configs[i].Description) - } - if bcl.configs[i].Usage != "" { - //glamText("**Usage**") - glamText(bcl.configs[i].Usage) - } - } - return nil -} - -func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { - bcl.Lock() - defer bcl.Unlock() - entries, err := os.ReadDir(path) - if err != nil { - return err - } - files := make([]fs.FileInfo, 0, len(entries)) - for _, entry := range entries { - info, err := entry.Info() - if err != nil { - return err - } - files = append(files, info) - } - for _, file := range files { - // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { - continue - } - c, err := readBackendConfig(filepath.Join(path, file.Name()), opts...) - if err == nil { - bcl.configs[c.Name] = *c - } - } - - return nil -} - -func (bcl *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { - bcl.Lock() - defer bcl.Unlock() - c, err := readBackendConfigFile(file, opts...) - if err != nil { - return fmt.Errorf("cannot load config file: %w", err) - } - - for _, cc := range c { - bcl.configs[cc.Name] = *cc - } - return nil -} - -////////// - -// Load a config file for a model -func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName string, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - - // Load a config file if present after the model name - cfg := &BackendConfig{ - PredictionOptions: schema.PredictionOptions{ - Model: modelName, - }, - } - - cfgExisting, exists := bcl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } else { - // Load a config file if present after the model name - modelConfig := filepath.Join(modelPath, modelName+".yaml") - if _, err := os.Stat(modelConfig); err == nil { - if err := bcl.LoadBackendConfig(modelConfig); err != nil { - return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = bcl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } - } - } - - cfg.SetDefaults(opts...) - return cfg, nil -} - -func readBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { - c := &[]*BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - for _, cc := range *c { - cc.SetDefaults(opts...) - } - - return *c, nil -} - -func readBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - c := &BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - c.SetDefaults(opts...) - return c, nil -} - -func (bcl *BackendConfigLoader) LoadBackendConfigForModelAndOpenAIRequest(modelFile string, input *schema.OpenAIRequest, appConfig *ApplicationConfig) (*BackendConfig, *schema.OpenAIRequest, error) { - cfg, err := bcl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, - LoadOptionContextSize(appConfig.ContextSize), - LoadOptionDebug(appConfig.Debug), - LoadOptionF16(appConfig.F16), - LoadOptionThreads(appConfig.Threads), - ) - - // Set the parameters for the language model prediction - updateBackendConfigFromOpenAIRequest(cfg, input) - - return cfg, input, err -} - -func updateBackendConfigFromOpenAIRequest(bc *BackendConfig, request *schema.OpenAIRequest) { - if request.Echo { - bc.Echo = request.Echo - } - if request.TopK != nil && *request.TopK != 0 { - bc.TopK = request.TopK - } - if request.TopP != nil && *request.TopP != 0 { - bc.TopP = request.TopP - } - - if request.Backend != "" { - bc.Backend = request.Backend - } - - if request.ClipSkip != 0 { - bc.Diffusers.ClipSkip = request.ClipSkip - } - - if request.ModelBaseName != "" { - bc.AutoGPTQ.ModelBaseName = request.ModelBaseName - } - - if request.NegativePromptScale != 0 { - bc.NegativePromptScale = request.NegativePromptScale - } - - if request.UseFastTokenizer { - bc.UseFastTokenizer = request.UseFastTokenizer - } - - if request.NegativePrompt != "" { - bc.NegativePrompt = request.NegativePrompt - } - - if request.RopeFreqBase != 0 { - bc.RopeFreqBase = request.RopeFreqBase - } - - if request.RopeFreqScale != 0 { - bc.RopeFreqScale = request.RopeFreqScale - } - - if request.Grammar != "" { - bc.Grammar = request.Grammar - } - - if request.Temperature != nil && *request.Temperature != 0 { - bc.Temperature = request.Temperature - } - - if request.Maxtokens != nil && *request.Maxtokens != 0 { - bc.Maxtokens = request.Maxtokens - } - - switch stop := request.Stop.(type) { - case string: - if stop != "" { - bc.StopWords = append(bc.StopWords, stop) - } - case []interface{}: - for _, pp := range stop { - if s, ok := pp.(string); ok { - bc.StopWords = append(bc.StopWords, s) - } - } - } - - if len(request.Tools) > 0 { - for _, tool := range request.Tools { - request.Functions = append(request.Functions, tool.Function) - } - } - - if request.ToolsChoice != nil { - var toolChoice grammar.Tool - switch content := request.ToolsChoice.(type) { - case string: - _ = json.Unmarshal([]byte(content), &toolChoice) - case map[string]interface{}: - dat, _ := json.Marshal(content) - _ = json.Unmarshal(dat, &toolChoice) - } - request.FunctionCall = map[string]interface{}{ - "name": toolChoice.Function.Name, - } - } - - // Decode each request's message content - index := 0 - for i, m := range request.Messages { - switch content := m.Content.(type) { - case string: - request.Messages[i].StringContent = content - case []interface{}: - dat, _ := json.Marshal(content) - c := []schema.Content{} - json.Unmarshal(dat, &c) - for _, pp := range c { - if pp.Type == "text" { - request.Messages[i].StringContent = pp.Text - } else if pp.Type == "image_url" { - // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: - base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL) - if err == nil { - request.Messages[i].StringImages = append(request.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff - // set a placeholder for each image - request.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + request.Messages[i].StringContent - index++ - } else { - fmt.Print("Failed encoding image", err) - } - } - } - } - } - - if request.RepeatPenalty != 0 { - bc.RepeatPenalty = request.RepeatPenalty - } - - if request.FrequencyPenalty != 0 { - bc.FrequencyPenalty = request.FrequencyPenalty - } - - if request.PresencePenalty != 0 { - bc.PresencePenalty = request.PresencePenalty - } - - if request.Keep != 0 { - bc.Keep = request.Keep - } - - if request.Batch != 0 { - bc.Batch = request.Batch - } - - if request.IgnoreEOS { - bc.IgnoreEOS = request.IgnoreEOS - } - - if request.Seed != nil { - bc.Seed = request.Seed - } - - if request.TypicalP != nil { - bc.TypicalP = request.TypicalP - } - - switch inputs := request.Input.(type) { - case string: - if inputs != "" { - bc.InputStrings = append(bc.InputStrings, inputs) - } - case []interface{}: - for _, pp := range inputs { - switch i := pp.(type) { - case string: - bc.InputStrings = append(bc.InputStrings, i) - case []interface{}: - tokens := []int{} - for _, ii := range i { - tokens = append(tokens, int(ii.(float64))) - } - bc.InputToken = append(bc.InputToken, tokens) - } - } - } - - // Can be either a string or an object - switch fnc := request.FunctionCall.(type) { - case string: - if fnc != "" { - bc.SetFunctionCallString(fnc) - } - case map[string]interface{}: - var name string - n, exists := fnc["name"] - if exists { - nn, e := n.(string) - if e { - name = nn - } - } - bc.SetFunctionCallNameString(name) - } - - switch p := request.Prompt.(type) { - case string: - bc.PromptStrings = append(bc.PromptStrings, p) - case []interface{}: - for _, pp := range p { - if s, ok := pp.(string); ok { - bc.PromptStrings = append(bc.PromptStrings, s) - } - } - } -} diff --git a/core/config/exports_test.go b/core/config/exports_test.go deleted file mode 100644 index 70ba84e6..00000000 --- a/core/config/exports_test.go +++ /dev/null @@ -1,6 +0,0 @@ -package config - -// This file re-exports private functions to be used directly in unit tests. -// Since this file's name ends in _test.go, theoretically these should not be exposed past the tests. - -var ReadBackendConfigFile = readBackendConfigFile diff --git a/core/http/api.go b/core/http/api.go index 7094899a..af38512a 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -1,20 +1,23 @@ package http import ( + "encoding/json" "errors" + "os" "strings" - "github.com/go-skynet/LocalAI/core" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/swagger" // swagger handler "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" @@ -52,12 +55,13 @@ func readAuthHeader(c *fiber.Ctx) string { // @securityDefinitions.apikey BearerAuth // @in header // @name Authorization -func App(application *core.Application) (*fiber.App, error) { + +func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ Views: renderEngine(), - BodyLimit: application.ApplicationConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB - DisableStartupMessage: application.ApplicationConfig.DisableMessage, + BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + DisableStartupMessage: appConfig.DisableMessage, // Override default error handler ErrorHandler: func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -78,7 +82,7 @@ func App(application *core.Application) (*fiber.App, error) { }, }) - if application.ApplicationConfig.Debug { + if appConfig.Debug { app.Use(logger.New(logger.Config{ Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", })) @@ -86,7 +90,7 @@ func App(application *core.Application) (*fiber.App, error) { // Default middleware config - if !application.ApplicationConfig.Debug { + if !appConfig.Debug { app.Use(recover.New()) } @@ -104,7 +108,25 @@ func App(application *core.Application) (*fiber.App, error) { // Auth middleware checking if API key is valid. If no API key is set, no auth is required. auth := func(c *fiber.Ctx) error { - if len(application.ApplicationConfig.ApiKeys) == 0 { + if len(appConfig.ApiKeys) == 0 { + return c.Next() + } + + // Check for api_keys.json file + fileContent, err := os.ReadFile("api_keys.json") + if err == nil { + // Parse JSON content from the file + var fileKeys []string + err := json.Unmarshal(fileContent, &fileKeys) + if err != nil { + return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) + } + + // Add file keys to options.ApiKeys + appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) + } + + if len(appConfig.ApiKeys) == 0 { return c.Next() } @@ -120,7 +142,7 @@ func App(application *core.Application) (*fiber.App, error) { } apiKey := authHeaderParts[1] - for _, key := range application.ApplicationConfig.ApiKeys { + for _, key := range appConfig.ApiKeys { if apiKey == key { return c.Next() } @@ -129,22 +151,20 @@ func App(application *core.Application) (*fiber.App, error) { return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"}) } - if application.ApplicationConfig.CORS { + if appConfig.CORS { var c func(ctx *fiber.Ctx) error - if application.ApplicationConfig.CORSAllowOrigins == "" { + if appConfig.CORSAllowOrigins == "" { c = cors.New() } else { - c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig.CORSAllowOrigins}) + c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins}) } app.Use(c) } - fiberContextExtractor := fiberContext.NewFiberContextExtractor(application.ModelLoader, application.ApplicationConfig) - // LocalAI API endpoints - galleryService := services.NewGalleryService(application.ApplicationConfig.ModelPath) - galleryService.Start(application.ApplicationConfig.Context, application.BackendConfigLoader) + galleryService := services.NewGalleryService(appConfig.ModelPath) + galleryService.Start(appConfig.Context, cl) app.Get("/version", auth, func(c *fiber.Ctx) error { return c.JSON(struct { @@ -152,17 +172,29 @@ func App(application *core.Application) (*fiber.App, error) { }{Version: internal.PrintableVersion()}) }) + // Make sure directories exists + os.MkdirAll(appConfig.ImageDir, 0755) + os.MkdirAll(appConfig.AudioDir, 0755) + os.MkdirAll(appConfig.UploadDir, 0755) + os.MkdirAll(appConfig.ConfigsDir, 0755) + os.MkdirAll(appConfig.ModelPath, 0755) + + // Load config jsons + utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) + utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) + utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + app.Get("/swagger/*", swagger.HandlerDefault) // default welcomeRoute( app, - application.BackendConfigLoader, - application.ModelLoader, - application.ApplicationConfig, + cl, + ml, + appConfig, auth, ) - modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(application.ApplicationConfig.Galleries, application.ApplicationConfig.ModelPath, galleryService) + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) @@ -171,85 +203,83 @@ func App(application *core.Application) (*fiber.App, error) { app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) - // Stores - storeLoader := model.NewModelLoader("") // TODO: Investigate if this should be migrated to application and reused. Should the path be configurable? Merging for now. - app.Post("/stores/set", auth, localai.StoresSetEndpoint(storeLoader, application.ApplicationConfig)) - app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(storeLoader, application.ApplicationConfig)) - app.Post("/stores/get", auth, localai.StoresGetEndpoint(storeLoader, application.ApplicationConfig)) - app.Post("/stores/find", auth, localai.StoresFindEndpoint(storeLoader, application.ApplicationConfig)) - - // openAI compatible API endpoints - - // chat - app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) - - // edit - app.Post("/v1/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) - - // assistant - // TODO: Refactor this to the new style eventually - app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - - // files - app.Post("/v1/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Post("/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/v1/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - - // completion - app.Post("/v1/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) - - // embeddings - app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) - app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) - app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) - - // audio - app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(fiberContextExtractor, application.TranscriptionBackendService)) - app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) - - // images - app.Post("/v1/images/generations", auth, openai.ImageEndpoint(fiberContextExtractor, application.ImageGenerationBackendService)) + app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) // Elevenlabs - app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) - // LocalAI TTS? - app.Post("/tts", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + // Stores + sl := model.NewModelLoader("") + app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) + app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) + app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) + app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) - if application.ApplicationConfig.ImageDir != "" { - app.Static("/generated-images", application.ApplicationConfig.ImageDir) + // openAI compatible API endpoint + + // chat + app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + + // edit + app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + + // assistant + app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + + // files + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + + // completion + app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + + // embeddings + app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + + // audio + app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) + + // images + app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) + + if appConfig.ImageDir != "" { + app.Static("/generated-images", appConfig.ImageDir) } - if application.ApplicationConfig.AudioDir != "" { - app.Static("/generated-audio", application.ApplicationConfig.AudioDir) + if appConfig.AudioDir != "" { + app.Static("/generated-audio", appConfig.AudioDir) } ok := func(c *fiber.Ctx) error { @@ -261,12 +291,13 @@ func App(application *core.Application) (*fiber.App, error) { app.Get("/readyz", ok) // Experimental Backend Statistics Module - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(application.BackendMonitorService)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(application.BackendMonitorService)) + backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) - app.Get("/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) + app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) diff --git a/core/http/api_test.go b/core/http/api_test.go index bf8feb1c..1553ed21 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -12,9 +12,7 @@ import ( "os" "path/filepath" "runtime" - "strings" - "github.com/go-skynet/LocalAI/core" "github.com/go-skynet/LocalAI/core/config" . "github.com/go-skynet/LocalAI/core/http" "github.com/go-skynet/LocalAI/core/schema" @@ -207,7 +205,9 @@ var _ = Describe("API test", func() { var cancel context.CancelFunc var tmpdir string var modelDir string - var application *core.Application + var bcl *config.BackendConfigLoader + var ml *model.ModelLoader + var applicationConfig *config.ApplicationConfig commonOpts := []config.AppOption{ config.WithDebug(true), @@ -252,7 +252,7 @@ var _ = Describe("API test", func() { }, } - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithGalleries(galleries), @@ -261,7 +261,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(backendAssetsDir))...) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -474,11 +474,11 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.ToolCalls[0].Function).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) + Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -487,9 +487,9 @@ var _ = Describe("API test", func() { }) It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() { - // if runtime.GOOS != "linux" { - // Skip("test supported only on linux") - // } + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } modelName := "codellama" response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml", @@ -504,7 +504,7 @@ var _ = Describe("API test", func() { Eventually(func() bool { response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) return response["processed"].(bool) - }, "480s", "10s").Should(Equal(true)) + }, "360s", "10s").Should(Equal(true)) By("testing chat") resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ @@ -551,13 +551,11 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - fmt.Printf("\n--- %+v\n\n", resp2.Choices[0].Message) - Expect(resp2.Choices[0].Message.ToolCalls).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.ToolCalls[0]).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) + Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -611,7 +609,7 @@ var _ = Describe("API test", func() { }, } - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithAudioDir(tmpdir), @@ -622,7 +620,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(tmpdir))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -726,14 +724,14 @@ var _ = Describe("API test", func() { var err error - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -763,11 +761,6 @@ var _ = Describe("API test", func() { Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions via ggml", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -775,11 +768,6 @@ var _ = Describe("API test", func() { }) It("can generate chat completions via ggml", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -787,11 +775,6 @@ var _ = Describe("API test", func() { }) It("can generate completions from model configs", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -799,11 +782,6 @@ var _ = Describe("API test", func() { }) It("can generate chat completions from model configs", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -890,9 +868,9 @@ var _ = Describe("API test", func() { Context("backends", func() { It("runs rwkv completion", func() { - // if runtime.GOOS != "linux" { - // Skip("test supported only on linux") - // } + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices) > 0).To(BeTrue()) @@ -913,20 +891,17 @@ var _ = Describe("API test", func() { } Expect(err).ToNot(HaveOccurred()) - - if len(response.Choices) > 0 { - text += response.Choices[0].Text - tokens++ - } + text += response.Choices[0].Text + tokens++ } Expect(text).ToNot(BeEmpty()) Expect(text).To(ContainSubstring("five")) Expect(tokens).ToNot(Or(Equal(1), Equal(0))) }) It("runs rwkv chat completion", func() { - // if runtime.GOOS != "linux" { - // Skip("test supported only on linux") - // } + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}}) Expect(err).ToNot(HaveOccurred()) @@ -1035,14 +1010,14 @@ var _ = Describe("API test", func() { c, cancel = context.WithCancel(context.Background()) var err error - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithModelPath(modelPath), config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -1066,33 +1041,18 @@ var _ = Describe("API test", func() { } }) It("can generate chat completions from config file (list1)", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate chat completions from config file (list2)", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate edit completions from config file", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - request := openaigo.EditCreateRequestBody{ Model: "list2", Instruction: "foo", diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go index 99fbcde9..ffb63111 100644 --- a/core/http/ctx/fiber.go +++ b/core/http/ctx/fiber.go @@ -1,88 +1,43 @@ package fiberContext import ( - "context" - "encoding/json" "fmt" "strings" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) -type FiberContextExtractor struct { - ml *model.ModelLoader - appConfig *config.ApplicationConfig -} - -func NewFiberContextExtractor(ml *model.ModelLoader, appConfig *config.ApplicationConfig) *FiberContextExtractor { - return &FiberContextExtractor{ - ml: ml, - appConfig: appConfig, - } -} - // ModelFromContext returns the model from the context // If no model is specified, it will take the first available // Takes a model string as input which should be the one received from the user request. // It returns the model name resolved from the context and an error if any. -func (fce *FiberContextExtractor) ModelFromContext(ctx *fiber.Ctx, modelInput string, firstModel bool) (string, error) { - ctxPM := ctx.Params("model") - if ctxPM != "" { - log.Debug().Msgf("[FCE] Overriding param modelInput %q with ctx.Params value %q", modelInput, ctxPM) - modelInput = ctxPM +func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { + if ctx.Params("model") != "" { + modelInput = ctx.Params("model") } // Set model from bearer token, if available - bearer := strings.TrimPrefix(ctx.Get("authorization"), "Bearer ") - bearerExists := bearer != "" && fce.ml.ExistsInModelPath(bearer) + bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ") + bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) // If no model was specified, take the first available if modelInput == "" && !bearerExists && firstModel { - models, _ := fce.ml.ListModels() + models, _ := loader.ListModels() if len(models) > 0 { modelInput = models[0] - log.Debug().Msgf("[FCE] No model specified, using first available: %s", modelInput) + log.Debug().Msgf("No model specified, using: %s", modelInput) } else { - log.Warn().Msgf("[FCE] No model specified, none available") - return "", fmt.Errorf("[fce] no model specified, none available") + log.Debug().Msgf("No model specified, returning error") + return "", fmt.Errorf("no model specified") } } // If a model is found in bearer token takes precedence if bearerExists { - log.Debug().Msgf("[FCE] Using model from bearer token: %s", bearer) + log.Debug().Msgf("Using model from bearer token: %s", bearer) modelInput = bearer } - - if modelInput == "" { - log.Warn().Msg("[FCE] modelInput is empty") - } return modelInput, nil } - -// TODO: Do we still need the first return value? -func (fce *FiberContextExtractor) OpenAIRequestFromContext(c *fiber.Ctx, firstModel bool) (string, *schema.OpenAIRequest, error) { - input := new(schema.OpenAIRequest) - - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return "", nil, fmt.Errorf("failed parsing request body: %w", err) - } - - received, _ := json.Marshal(input) - - ctx, cancel := context.WithCancel(fce.appConfig.Context) - input.Context = ctx - input.Cancel = cancel - - log.Debug().Msgf("Request received: %s", string(received)) - - var err error - input.Model, err = fce.ModelFromContext(c, input.Model, firstModel) - - return input.Model, input, err -} diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index 4f5db463..841f9b5f 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -2,7 +2,9 @@ package elevenlabs import ( "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -15,7 +17,7 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/text-to-speech/{voice-id} [post] -func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { +func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.ElevenLabsTTSRequest) @@ -26,21 +28,34 @@ func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToS return err } - var err error - input.ModelID, err = fce.ModelFromContext(c, input.ModelID, false) + modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false) if err != nil { + modelFile = input.ModelID log.Warn().Msgf("Model not found in context: %s", input.ModelID) } - responseChannel := ttsbs.TextToAudioFile(&schema.TTSRequest{ - Model: input.ModelID, - Voice: voiceID, - Input: input.Text, - }) - rawValue := <-responseChannel - if rawValue.Error != nil { - return rawValue.Error + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + if err != nil { + modelFile = input.ModelID + log.Warn().Msgf("Model not found in context: %s", input.ModelID) + } else { + if input.ModelID != "" { + modelFile = input.ModelID + } else { + modelFile = cfg.Model + } } - return c.Download(*rawValue.Value) + log.Debug().Msgf("Request for model: %s", modelFile) + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg) + if err != nil { + return err + } + return c.Download(filePath) } } diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index dac20388..8c7a664a 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -6,7 +6,7 @@ import ( "github.com/gofiber/fiber/v2" ) -func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { +func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) @@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct } } -func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { +func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) // Get input data from the request body diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index df7841fb..7822e024 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -2,7 +2,9 @@ package localai import ( "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -14,26 +16,45 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/audio/speech [post] -func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { +func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - var err error + input := new(schema.TTSRequest) // Get input data from the request body - if err = c.BodyParser(input); err != nil { + if err := c.BodyParser(input); err != nil { return err } - input.Model, err = fce.ModelFromContext(c, input.Model, false) + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) if err != nil { + modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } - responseChannel := ttsbs.TextToAudioFile(input) - rawValue := <-responseChannel - if rawValue.Error != nil { - return rawValue.Error + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model } - return c.Download(*rawValue.Value) + log.Debug().Msgf("Request for model: %s", modelFile) + + if input.Backend != "" { + cfg.Backend = input.Backend + } + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg) + if err != nil { + return err + } + return c.Download(filePath) } } diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index 72cb8b4a..dceb3789 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistantID %q", assistantID)) + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find ")) } } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index a240b024..36d1142b 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -5,11 +5,17 @@ import ( "bytes" "encoding/json" "fmt" + "strings" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/grammar" + model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -19,82 +25,412 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] -func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { +func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { + emptyMessage := "" + id := uuid.New().String() + created := int(time.Now().Unix()) + + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: usage.Prompt, + CompletionTokens: usage.Completion, + TotalTokens: usage.Prompt + usage.Completion, + }, + } + + responses <- resp + return true + }) + close(responses) + } + processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + result := "" + _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + result += s + // TODO: Change generated BNF grammar to be compliant with the schema so we can + // stream the result token by token here. + return true + }) + + results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls) + noActionToRun := len(results) > 0 && results[0].name == noAction + + switch { + case noActionToRun: + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) + if err != nil { + log.Error().Err(err).Msg("error handling question") + return + } + + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, + } + + responses <- resp + + default: + for i, ss := range results { + name, args := ss.name, ss.arguments + + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + responses <- schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Arguments: args, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + } + } + + close(responses) + } + return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + processFunctions := false + funcs := grammar.Functions{} + modelFile, input, err := readRequest(c, ml, startupOptions, true) if err != nil { - return fmt.Errorf("failed reading parameters from request: %w", err) + return fmt.Errorf("failed reading parameters from request:%w", err) } - traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) + } + log.Debug().Msgf("Configuration read: %+v", config) + + // Allow the user to set custom actions via config file + // to be "embedded" in each model + noActionName := "answer" + noActionDescription := "use this action to answer without performing any action" + + if config.FunctionsConfig.NoActionFunctionName != "" { + noActionName = config.FunctionsConfig.NoActionFunctionName + } + if config.FunctionsConfig.NoActionDescriptionName != "" { + noActionDescription = config.FunctionsConfig.NoActionDescriptionName } - if request.Stream { + if input.ResponseFormat.Type == "json_object" { + input.Grammar = grammar.JSONBNF + } - log.Debug().Msgf("Chat Stream request received") + config.Grammar = input.Grammar + // process functions if we have any defined or if we have a function call string + if len(input.Functions) > 0 && config.ShouldUseFunctions() { + log.Debug().Msgf("Response needs to process functions") + + processFunctions = true + + noActionGrammar := grammar.Function{ + Name: noActionName, + Description: noActionDescription, + Parameters: map[string]interface{}{ + "properties": map[string]interface{}{ + "message": map[string]interface{}{ + "type": "string", + "description": "The message to reply the user with", + }}, + }, + } + + // Append the no action function + funcs = append(funcs, input.Functions...) + if !config.FunctionsConfig.DisableNoAction { + funcs = append(funcs, noActionGrammar) + } + + // Force picking one of the functions by the request + if config.FunctionToCall() != "" { + funcs = funcs.Select(config.FunctionToCall()) + } + + // Update input grammar + jsStruct := funcs.ToJSONStructure() + config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls) + } else if input.JSONFunctionGrammarObject != nil { + config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls) + } + + // functions are not supported in stream mode (yet?) + toStream := input.Stream + + log.Debug().Msgf("Parameters: %+v", config) + + var predInput string + + // If we are using the tokenizer template, we don't need to process the messages + // unless we are processing functions + if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { + + suppressConfigSystemPrompt := false + mess := []string{} + for messageIndex, i := range input.Messages { + var content string + role := i.Role + + // if function call, we might want to customize the role so we can display better that the "assistant called a json action" + // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { + roleFn := "assistant_function_call" + r := config.Roles[roleFn] + if r != "" { + role = roleFn + } + } + r := config.Roles[role] + contentExists := i.Content != nil && i.StringContent != "" + + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + + // First attempt to populate content via a chat message specific template + if config.TemplateConfig.ChatMessage != "" { + chatMessageData := model.ChatMessageTemplateData{ + SystemPrompt: config.SystemPrompt, + Role: r, + RoleName: role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(input.Messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), + MessageIndex: messageIndex, + } + templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) + if err != nil { + log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") + } else { + if templatedChatMessage == "" { + log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) + continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + } + log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) + content = templatedChatMessage + } + } + + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } + // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. + if content == "" { + if r != "" { + if contentExists { + content = fmt.Sprint(r, i.StringContent) + } + + if i.FunctionCall != nil { + marshalAnyRole(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAnyRole(i.ToolCalls) + } + } else { + if contentExists { + content = fmt.Sprint(i.StringContent) + } + if i.FunctionCall != nil { + marshalAny(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) + } + } + // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately + if contentExists && role == "system" { + suppressConfigSystemPrompt = true + } + } + + mess = append(mess, content) + } + + predInput = strings.Join(mess, "\n") + log.Debug().Msgf("Prompt (before templating): %s", predInput) + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model + } + + if config.TemplateConfig.Chat != "" && !processFunctions { + templateFile = config.TemplateConfig.Chat + } + + if config.TemplateConfig.Functions != "" && processFunctions { + templateFile = config.TemplateConfig.Functions + } + + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + SuppressSystemPrompt: suppressConfigSystemPrompt, + Input: predInput, + Functions: funcs, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) + } else { + log.Debug().Msgf("Template failed loading: %s", err.Error()) + } + } + + log.Debug().Msgf("Prompt (after templating): %s", predInput) + if processFunctions { + log.Debug().Msgf("Grammar: %+v", config.Grammar) + } + } + + switch { + case toStream: + + log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) - // + // c.Set("Content-Type", "text/event-stream") c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") + responses := make(chan schema.OpenAIResponse) + + if !processFunctions { + go process(predInput, input, config, ml, responses) + } else { + go processTools(noActionName, predInput, input, config, ml, responses) + } + c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { usage := &schema.OpenAIUsage{} toolsCalled := false - for ev := range tokenChannel { - if ev.Error != nil { - log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error") - request.Cancel() - break - } - usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it - - if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 { + for ev := range responses { + usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it + if len(ev.Choices[0].Delta.ToolCalls) > 0 { toolsCalled = true } var buf bytes.Buffer enc := json.NewEncoder(&buf) - if ev.Error != nil { - log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler") - enc.Encode(ev.Error) - } else { - enc.Encode(ev.Value) - } - log.Debug().Msgf("chat streaming sending chunk: %s", buf.String()) + enc.Encode(ev) + log.Debug().Msgf("Sending chunk: %s", buf.String()) _, err := fmt.Fprintf(w, "data: %v\n", buf.String()) if err != nil { - log.Debug().Err(err).Msgf("Sending chunk failed") - request.Cancel() - break - } - err = w.Flush() - if err != nil { - log.Debug().Msg("error while flushing, closing connection") - request.Cancel() + log.Debug().Msgf("Sending chunk failed: %v", err) + input.Cancel() break } + w.Flush() } finishReason := "stop" if toolsCalled { finishReason = "tool_calls" - } else if toolsCalled && len(request.Tools) == 0 { + } else if toolsCalled && len(input.Tools) == 0 { finishReason = "function_call" } resp := &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { FinishReason: finishReason, Index: 0, - Delta: &schema.Message{Content: ""}, + Delta: &schema.Message{Content: &emptyMessage}, }}, Object: "chat.completion.chunk", Usage: *usage, @@ -105,21 +441,202 @@ func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAI w.WriteString("data: [DONE]\n\n") w.Flush() })) - return nil + + // no streaming mode + default: + result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) { + if !processFunctions { + // no function is called, just reply and use stop as finish reason + *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) + return + } + + results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls) + noActionsToRun := len(results) > 0 && results[0].name == noActionName + + switch { + case noActionsToRun: + result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) + if err != nil { + log.Error().Err(err).Msg("error handling question") + return + } + *c = append(*c, schema.Choice{ + Message: &schema.Message{Role: "assistant", Content: &result}}) + default: + toolChoice := schema.Choice{ + Message: &schema.Message{ + Role: "assistant", + }, + } + + if len(input.Tools) > 0 { + toolChoice.FinishReason = "tool_calls" + } + + for _, ss := range results { + name, args := ss.name, ss.arguments + if len(input.Tools) > 0 { + // If we are using tools, we condense the function calls into + // a single response choice with all the tools + toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls, + schema.ToolCall{ + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + Arguments: args, + }, + }, + ) + } else { + // otherwise we return more choices directly + *c = append(*c, schema.Choice{ + FinishReason: "function_call", + Message: &schema.Message{ + Role: "assistant", + FunctionCall: map[string]interface{}{ + "name": name, + "arguments": args, + }, + }, + }) + } + } + + if len(input.Tools) > 0 { + // we need to append our result if we are using tools + *c = append(*c, toolChoice) + } + } + + }, nil) + if err != nil { + return err + } + + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "chat.completion", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, + } + respData, _ := json.Marshal(resp) + log.Debug().Msgf("Response: %s", respData) + + // Return the prediction in the response body + return c.JSON(resp) } - // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? - rawResponse := <-finalResultChannel - - if rawResponse.Error != nil { - return rawResponse.Error - } - - jsonResult, _ := json.Marshal(rawResponse.Value) - log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response") - - // Return the prediction in the response body - return c.JSON(rawResponse.Value) } } + +func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) { + log.Debug().Msgf("nothing to do, computing a reply") + + // If there is a message that the LLM already sends as part of the JSON reply, use it + arguments := map[string]interface{}{} + json.Unmarshal([]byte(args), &arguments) + m, exists := arguments["message"] + if exists { + switch message := m.(type) { + case string: + if message != "" { + log.Debug().Msgf("Reply received from LLM: %s", message) + message = backend.Finetune(*config, prompt, message) + log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) + + return message, nil + } + } + } + + log.Debug().Msgf("No action received from LLM, without a message, computing a reply") + // Otherwise ask the LLM to understand the JSON output and the context, and return a message + // Note: This costs (in term of CPU/GPU) another computation + config.Grammar = "" + images := []string{} + for _, m := range input.Messages { + images = append(images, m.StringImages...) + } + + predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil) + if err != nil { + log.Error().Err(err).Msg("model inference failed") + return "", err + } + + prediction, err := predFunc() + if err != nil { + log.Error().Err(err).Msg("prediction failed") + return "", err + } + return backend.Finetune(*config, prompt, prediction.Response), nil +} + +type funcCallResults struct { + name string + arguments string +} + +func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { + results := []funcCallResults{} + + // TODO: use generics to avoid this code duplication + if multipleResults { + ss := []map[string]interface{}{} + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + for _, s := range ss { + func_name, ok := s["function"] + if !ok { + continue + } + args, ok := s["arguments"] + if !ok { + continue + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + continue + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + } else { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name, ok := ss["function"] + if !ok { + return results + } + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + return results + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + return results + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + + return results +} diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index d8b412a9..69923475 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -4,13 +4,18 @@ import ( "bufio" "bytes" "encoding/json" + "errors" "fmt" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/grammar" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -20,50 +25,116 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/completions [post] -func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { +func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + id := uuid.New().String() + created := int(time.Now().Unix()) + + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{ + { + Index: 0, + Text: s, + }, + }, + Object: "text_completion", + Usage: schema.OpenAIUsage{ + PromptTokens: usage.Prompt, + CompletionTokens: usage.Completion, + TotalTokens: usage.Prompt + usage.Completion, + }, + } + log.Debug().Msgf("Sending goroutine: %s", s) + + responses <- resp + return true + }) + close(responses) + } + return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + modelFile, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - log.Debug().Msgf("`OpenAIRequest`: %+v", request) + log.Debug().Msgf("`input`: %+v", input) - traceID, finalResultChannel, _, _, tokenChannel, err := oais.Completion(request, false, request.Stream) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) } - if request.Stream { - log.Debug().Msgf("Completion Stream request received") + if input.ResponseFormat.Type == "json_object" { + input.Grammar = grammar.JSONBNF + } + config.Grammar = input.Grammar + + log.Debug().Msgf("Parameter Config: %+v", config) + + if input.Stream { + log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) //c.Set("Content-Type", "text/event-stream") c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") + } + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model + } + + if config.TemplateConfig.Completion != "" { + templateFile = config.TemplateConfig.Completion + } + + if input.Stream { + if len(config.PromptStrings) > 1 { + return errors.New("cannot handle more than 1 `PromptStrings` when Streaming") + } + + predInput := config.PromptStrings[0] + + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ + Input: predInput, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) + } + } + + responses := make(chan schema.OpenAIResponse) + + go process(predInput, input, config, ml, responses) c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { - for ev := range tokenChannel { + + for ev := range responses { var buf bytes.Buffer enc := json.NewEncoder(&buf) - if ev.Error != nil { - log.Debug().Msgf("[CompletionEndpoint] error to debug during tokenChannel handler: %q", ev.Error) - enc.Encode(ev.Error) - } else { - enc.Encode(ev.Value) - } + enc.Encode(ev) - log.Debug().Msgf("completion streaming sending chunk: %s", buf.String()) + log.Debug().Msgf("Sending chunk: %s", buf.String()) fmt.Fprintf(w, "data: %v\n", buf.String()) w.Flush() } resp := &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { Index: 0, @@ -80,15 +151,55 @@ func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services. })) return nil } - // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? - rawResponse := <-finalResultChannel - if rawResponse.Error != nil { - return rawResponse.Error + + var result []schema.Choice + + totalTokenUsage := backend.TokenUsage{} + + for k, i := range config.PromptStrings { + if templateFile != "" { + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + Input: i, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) + } + } + + r, tokenUsage, err := ComputeChoices( + input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { + *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k}) + }, nil) + if err != nil { + return err + } + + totalTokenUsage.Prompt += tokenUsage.Prompt + totalTokenUsage.Completion += tokenUsage.Completion + + result = append(result, r...) } - jsonResult, _ := json.Marshal(rawResponse.Value) + + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "text_completion", + Usage: schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + }, + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index a33050dd..25497095 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -3,36 +3,92 @@ package openai import ( "encoding/json" "fmt" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/rs/zerolog/log" ) -func EditEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { +func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + modelFile, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - _, finalResultChannel, _, _, _, err := oais.Edit(request, false, request.Stream) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) } - rawResponse := <-finalResultChannel - if rawResponse.Error != nil { - return rawResponse.Error + log.Debug().Msgf("Parameter Config: %+v", config) + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model } - jsonResult, _ := json.Marshal(rawResponse.Value) + if config.TemplateConfig.Edit != "" { + templateFile = config.TemplateConfig.Edit + } + + var result []schema.Choice + totalTokenUsage := backend.TokenUsage{} + + for _, i := range config.InputStrings { + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ + Input: i, + Instruction: input.Instruction, + SystemPrompt: config.SystemPrompt, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) + } + } + + r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { + *c = append(*c, schema.Choice{Text: s}) + }, nil) + if err != nil { + return err + } + + totalTokenUsage.Prompt += tokenUsage.Prompt + totalTokenUsage.Completion += tokenUsage.Completion + + result = append(result, r...) + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "edit", + Usage: schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + }, + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index be546991..eca34f79 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -3,9 +3,14 @@ package openai import ( "encoding/json" "fmt" + "time" "github.com/go-skynet/LocalAI/core/backend" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" + + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -16,25 +21,63 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/embeddings [post] -func EmbeddingsEndpoint(fce *fiberContext.FiberContextExtractor, ebs *backend.EmbeddingsBackendService) func(c *fiber.Ctx) error { +func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - _, input, err := fce.OpenAIRequestFromContext(c, true) + model, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - responseChannel := ebs.Embeddings(input) - - rawResponse := <-responseChannel - - if rawResponse.Error != nil { - return rawResponse.Error + config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + if err != nil { + return fmt.Errorf("failed reading parameters from request:%w", err) } - jsonResult, _ := json.Marshal(rawResponse.Value) + log.Debug().Msgf("Parameter Config: %+v", config) + items := []schema.Item{} + + for i, s := range config.InputToken { + // get the model function to call for the result + embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig) + if err != nil { + return err + } + + embeddings, err := embedFn() + if err != nil { + return err + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + for i, s := range config.InputStrings { + // get the model function to call for the result + embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig) + if err != nil { + return err + } + + embeddings, err := embedFn() + if err != nil { + return err + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Data: items, + Object: "list", + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index ec3d84da..9e806b3e 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -1,18 +1,50 @@ package openai import ( + "bufio" + "encoding/base64" "encoding/json" "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" "github.com/go-skynet/LocalAI/core/backend" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) -// https://platform.openai.com/docs/api-reference/images/create +func downloadFile(url string) (string, error) { + // Get the data + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // Create the file + out, err := os.CreateTemp("", "image") + if err != nil { + return "", err + } + defer out.Close() + + // Write the body to file + _, err = io.Copy(out, resp.Body) + return out.Name(), err +} + +// /* * @@ -27,36 +59,186 @@ import ( * */ - // ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create // @Summary Creates an image given a prompt. // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/images/generations [post] -func ImageEndpoint(fce *fiberContext.FiberContextExtractor, igbs *backend.ImageGenerationBackendService) func(c *fiber.Ctx) error { +func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - // TODO: Somewhat a hack. Is there a better place to assign this? - if igbs.BaseUrlForGeneratedImages == "" { - igbs.BaseUrlForGeneratedImages = c.BaseURL() + "/generated-images/" - } - _, request, err := fce.OpenAIRequestFromContext(c, false) + m, input, err := readRequest(c, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - responseChannel := igbs.GenerateImage(request) - rawResponse := <-responseChannel - - if rawResponse.Error != nil { - return rawResponse.Error + if m == "" { + m = model.StableDiffusionBackend } + log.Debug().Msgf("Loading model: %+v", m) - jsonResult, err := json.Marshal(rawResponse.Value) + config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) } + + src := "" + if input.File != "" { + + fileData := []byte{} + // check if input.File is an URL, if so download it and save it + // to a temporary file + if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") { + out, err := downloadFile(input.File) + if err != nil { + return fmt.Errorf("failed downloading file:%w", err) + } + defer os.RemoveAll(out) + + fileData, err = os.ReadFile(out) + if err != nil { + return fmt.Errorf("failed reading file:%w", err) + } + + } else { + // base 64 decode the file and write it somewhere + // that we will cleanup + fileData, err = base64.StdEncoding.DecodeString(input.File) + if err != nil { + return err + } + } + + // Create a temporary file + outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64") + if err != nil { + return err + } + // write the base64 result + writer := bufio.NewWriter(outputFile) + _, err = writer.Write(fileData) + if err != nil { + outputFile.Close() + return err + } + outputFile.Close() + src = outputFile.Name() + defer os.RemoveAll(src) + } + + log.Debug().Msgf("Parameter Config: %+v", config) + + switch config.Backend { + case "stablediffusion": + config.Backend = model.StableDiffusionBackend + case "tinydream": + config.Backend = model.TinyDreamBackend + case "": + config.Backend = model.StableDiffusionBackend + } + + sizeParts := strings.Split(input.Size, "x") + if len(sizeParts) != 2 { + return fmt.Errorf("invalid value for 'size'") + } + width, err := strconv.Atoi(sizeParts[0]) + if err != nil { + return fmt.Errorf("invalid value for 'size'") + } + height, err := strconv.Atoi(sizeParts[1]) + if err != nil { + return fmt.Errorf("invalid value for 'size'") + } + + b64JSON := false + if input.ResponseFormat.Type == "b64_json" { + b64JSON = true + } + // src and clip_skip + var result []schema.Item + for _, i := range config.PromptStrings { + n := input.N + if input.N == 0 { + n = 1 + } + for j := 0; j < n; j++ { + prompts := strings.Split(i, "|") + positive_prompt := prompts[0] + negative_prompt := "" + if len(prompts) > 1 { + negative_prompt = prompts[1] + } + + mode := 0 + step := config.Step + if step == 0 { + step = 15 + } + + if input.Mode != 0 { + mode = input.Mode + } + + if input.Step != 0 { + step = input.Step + } + + tempDir := "" + if !b64JSON { + tempDir = appConfig.ImageDir + } + // Create a temporary file + outputFile, err := os.CreateTemp(tempDir, "b64") + if err != nil { + return err + } + outputFile.Close() + output := outputFile.Name() + ".png" + // Rename the temporary file + err = os.Rename(outputFile.Name(), output) + if err != nil { + return err + } + + baseURL := c.BaseURL() + + fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig) + if err != nil { + return err + } + if err := fn(); err != nil { + return err + } + + item := &schema.Item{} + + if b64JSON { + defer os.RemoveAll(output) + data, err := os.ReadFile(output) + if err != nil { + return err + } + item.B64JSON = base64.StdEncoding.EncodeToString(data) + } else { + base := filepath.Base(output) + item.URL = baseURL + "/generated-images/" + base + } + + result = append(result, *item) + } + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Data: result, + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) + // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go new file mode 100644 index 00000000..06e784b7 --- /dev/null +++ b/core/http/endpoints/openai/inference.go @@ -0,0 +1,55 @@ +package openai + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + + "github.com/go-skynet/LocalAI/core/schema" + model "github.com/go-skynet/LocalAI/pkg/model" +) + +func ComputeChoices( + req *schema.OpenAIRequest, + predInput string, + config *config.BackendConfig, + o *config.ApplicationConfig, + loader *model.ModelLoader, + cb func(string, *[]schema.Choice), + tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) { + n := req.N // number of completions to return + result := []schema.Choice{} + + if n == 0 { + n = 1 + } + + images := []string{} + for _, m := range req.Messages { + images = append(images, m.StringImages...) + } + + // get the model function to call for the result + predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback) + if err != nil { + return result, backend.TokenUsage{}, err + } + + tokenUsage := backend.TokenUsage{} + + for i := 0; i < n; i++ { + prediction, err := predFunc() + if err != nil { + return result, backend.TokenUsage{}, err + } + + tokenUsage.Prompt += prediction.Usage.Prompt + tokenUsage.Completion += prediction.Usage.Completion + + finetunedResponse := backend.Finetune(*config, predInput, prediction.Response) + cb(finetunedResponse, &result) + + //result = append(result, Choice{Text: prediction}) + + } + return result, tokenUsage, err +} diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 9bb2b2ca..04e611a2 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -1,21 +1,61 @@ package openai import ( + "regexp" + + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/core/services" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) -func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { - // If blank, no filter is applied. + models, err := ml.ListModels() + if err != nil { + return err + } + var mm map[string]interface{} = map[string]interface{}{} + + dataModels := []schema.OpenAIModel{} + + var filterFn func(name string) bool filter := c.Query("filter") + + // If filter is not specified, do not filter the list by model name + if filter == "" { + filterFn = func(_ string) bool { return true } + } else { + // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn + rxp, err := regexp.Compile(filter) + if err != nil { + return err + } + filterFn = func(name string) bool { + return rxp.MatchString(name) + } + } + // By default, exclude any loose files that are already referenced by a configuration file. excludeConfigured := c.QueryBool("excludeConfigured", true) - dataModels, err := lms.ListModels(filter, excludeConfigured) - if err != nil { - return err + // Start with the known configurations + for _, c := range cl.GetAllBackendConfigs() { + if excludeConfigured { + mm[c.Model] = nil + } + + if filterFn(c.Name) { + dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) + } + } + + // Then iterate through the loose files: + for _, m := range models { + // And only adds them if they shouldn't be skipped. + if _, exists := mm[m]; !exists && filterFn(m) { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } } return c.JSON(struct { diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go new file mode 100644 index 00000000..369fb0b8 --- /dev/null +++ b/core/http/endpoints/openai/request.go @@ -0,0 +1,285 @@ +package openai + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + + "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/grammar" + model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" +) + +func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { + input := new(schema.OpenAIRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return "", nil, fmt.Errorf("failed parsing request body: %w", err) + } + + received, _ := json.Marshal(input) + + ctx, cancel := context.WithCancel(o.Context) + input.Context = ctx + input.Cancel = cancel + + log.Debug().Msgf("Request received: %s", string(received)) + + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel) + + return modelFile, input, err +} + +// this function check if the string is an URL, if it's an URL downloads the image in memory +// encodes it in base64 and returns the base64 string +func getBase64Image(s string) (string, error) { + if strings.HasPrefix(s, "http") { + // download the image + resp, err := http.Get(s) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // read the image data into memory + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // encode the image data in base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // return the base64 string + return encoded, nil + } + + // if the string instead is prefixed with "data:image/jpeg;base64,", drop it + if strings.HasPrefix(s, "data:image/jpeg;base64,") { + return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil + } + return "", fmt.Errorf("not valid string") +} + +func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) { + if input.Echo { + config.Echo = input.Echo + } + if input.TopK != nil { + config.TopK = input.TopK + } + if input.TopP != nil { + config.TopP = input.TopP + } + + if input.Backend != "" { + config.Backend = input.Backend + } + + if input.ClipSkip != 0 { + config.Diffusers.ClipSkip = input.ClipSkip + } + + if input.ModelBaseName != "" { + config.AutoGPTQ.ModelBaseName = input.ModelBaseName + } + + if input.NegativePromptScale != 0 { + config.NegativePromptScale = input.NegativePromptScale + } + + if input.UseFastTokenizer { + config.UseFastTokenizer = input.UseFastTokenizer + } + + if input.NegativePrompt != "" { + config.NegativePrompt = input.NegativePrompt + } + + if input.RopeFreqBase != 0 { + config.RopeFreqBase = input.RopeFreqBase + } + + if input.RopeFreqScale != 0 { + config.RopeFreqScale = input.RopeFreqScale + } + + if input.Grammar != "" { + config.Grammar = input.Grammar + } + + if input.Temperature != nil { + config.Temperature = input.Temperature + } + + if input.Maxtokens != nil { + config.Maxtokens = input.Maxtokens + } + + switch stop := input.Stop.(type) { + case string: + if stop != "" { + config.StopWords = append(config.StopWords, stop) + } + case []interface{}: + for _, pp := range stop { + if s, ok := pp.(string); ok { + config.StopWords = append(config.StopWords, s) + } + } + } + + if len(input.Tools) > 0 { + for _, tool := range input.Tools { + input.Functions = append(input.Functions, tool.Function) + } + } + + if input.ToolsChoice != nil { + var toolChoice grammar.Tool + + switch content := input.ToolsChoice.(type) { + case string: + _ = json.Unmarshal([]byte(content), &toolChoice) + case map[string]interface{}: + dat, _ := json.Marshal(content) + _ = json.Unmarshal(dat, &toolChoice) + } + input.FunctionCall = map[string]interface{}{ + "name": toolChoice.Function.Name, + } + } + + // Decode each request's message content + index := 0 + for i, m := range input.Messages { + switch content := m.Content.(type) { + case string: + input.Messages[i].StringContent = content + case []interface{}: + dat, _ := json.Marshal(content) + c := []schema.Content{} + json.Unmarshal(dat, &c) + for _, pp := range c { + if pp.Type == "text" { + input.Messages[i].StringContent = pp.Text + } else if pp.Type == "image_url" { + // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: + base64, err := getBase64Image(pp.ImageURL.URL) + if err == nil { + input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff + // set a placeholder for each image + input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent + index++ + } else { + fmt.Print("Failed encoding image", err) + } + } + } + } + } + + if input.RepeatPenalty != 0 { + config.RepeatPenalty = input.RepeatPenalty + } + + if input.FrequencyPenalty != 0 { + config.FrequencyPenalty = input.FrequencyPenalty + } + + if input.PresencePenalty != 0 { + config.PresencePenalty = input.PresencePenalty + } + + if input.Keep != 0 { + config.Keep = input.Keep + } + + if input.Batch != 0 { + config.Batch = input.Batch + } + + if input.IgnoreEOS { + config.IgnoreEOS = input.IgnoreEOS + } + + if input.Seed != nil { + config.Seed = input.Seed + } + + if input.TypicalP != nil { + config.TypicalP = input.TypicalP + } + + switch inputs := input.Input.(type) { + case string: + if inputs != "" { + config.InputStrings = append(config.InputStrings, inputs) + } + case []interface{}: + for _, pp := range inputs { + switch i := pp.(type) { + case string: + config.InputStrings = append(config.InputStrings, i) + case []interface{}: + tokens := []int{} + for _, ii := range i { + tokens = append(tokens, int(ii.(float64))) + } + config.InputToken = append(config.InputToken, tokens) + } + } + } + + // Can be either a string or an object + switch fnc := input.FunctionCall.(type) { + case string: + if fnc != "" { + config.SetFunctionCallString(fnc) + } + case map[string]interface{}: + var name string + n, exists := fnc["name"] + if exists { + nn, e := n.(string) + if e { + name = nn + } + } + config.SetFunctionCallNameString(name) + } + + switch p := input.Prompt.(type) { + case string: + config.PromptStrings = append(config.PromptStrings, p) + case []interface{}: + for _, pp := range p { + if s, ok := pp.(string); ok { + config.PromptStrings = append(config.PromptStrings, s) + } + } + } +} + +func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { + cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath, + config.LoadOptionDebug(debug), + config.LoadOptionThreads(threads), + config.LoadOptionContextSize(ctx), + config.LoadOptionF16(f16), + ) + + // Set the parameters for the language model prediction + updateRequestConfig(cfg, input) + + return cfg, input, err +} diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go index 572cec12..c7dd39e7 100644 --- a/core/http/endpoints/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -9,7 +9,8 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/backend" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/config" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -22,15 +23,17 @@ import ( // @Param file formData file true "file" // @Success 200 {object} map[string]string "Response" // @Router /v1/audio/transcriptions [post] -func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.TranscriptionBackendService) func(c *fiber.Ctx) error { +func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + m, input, err := readRequest(c, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - // TODO: Investigate this file copy stuff later - potentially belongs in service. - + config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + if err != nil { + return fmt.Errorf("failed reading parameters from request:%w", err) + } // retrieve the file data from the request file, err := c.FormFile("file") if err != nil { @@ -62,16 +65,13 @@ func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.Tr log.Debug().Msgf("Audio file copied to: %+v", dst) - request.File = dst - - responseChannel := tbs.Transcribe(request) - rawResponse := <-responseChannel - - if rawResponse.Error != nil { - return rawResponse.Error + tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig) + if err != nil { + return err } - log.Debug().Msgf("Transcribed: %+v", rawResponse.Value) + + log.Debug().Msgf("Trascribed: %+v", tr) // TODO: handle different outputs here - return c.Status(http.StatusOK).JSON(rawResponse.Value) + return c.Status(http.StatusOK).JSON(tr) } } diff --git a/core/schema/transcription.go b/core/schema/whisper.go similarity index 90% rename from core/schema/transcription.go rename to core/schema/whisper.go index fe1799fa..41413c1f 100644 --- a/core/schema/transcription.go +++ b/core/schema/whisper.go @@ -10,7 +10,7 @@ type Segment struct { Tokens []int `json:"tokens"` } -type TranscriptionResult struct { +type Result struct { Segments []Segment `json:"segments"` Text string `json:"text"` } diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go index a610432c..979a67a3 100644 --- a/core/services/backend_monitor.go +++ b/core/services/backend_monitor.go @@ -15,22 +15,22 @@ import ( gopsutil "github.com/shirou/gopsutil/v3/process" ) -type BackendMonitorService struct { +type BackendMonitor struct { configLoader *config.BackendConfigLoader modelLoader *model.ModelLoader options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. } -func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService { - return &BackendMonitorService{ +func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor { + return BackendMonitor{ configLoader: configLoader, modelLoader: modelLoader, options: appConfig, } } -func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) { - config, exists := bms.configLoader.GetBackendConfig(modelName) +func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) { + config, exists := bm.configLoader.GetBackendConfig(modelName) var backendId string if exists { backendId = config.Model @@ -46,8 +46,8 @@ func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) return backendId, nil } -func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { - config, exists := bms.configLoader.GetBackendConfig(model) +func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { + config, exists := bm.configLoader.GetBackendConfig(model) var backend string if exists { backend = config.Model @@ -60,7 +60,7 @@ func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*sche backend = fmt.Sprintf("%s.bin", backend) } - pid, err := bms.modelLoader.GetGRPCPID(backend) + pid, err := bm.modelLoader.GetGRPCPID(backend) if err != nil { log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid") @@ -101,12 +101,12 @@ func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*sche }, nil } -func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) { - backendId, err := bms.getModelLoaderIDFromModelName(modelName) +func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) { + backendId, err := bm.getModelLoaderIDFromModelName(modelName) if err != nil { return nil, err } - modelAddr := bms.modelLoader.CheckIsLoaded(backendId) + modelAddr := bm.modelLoader.CheckIsLoaded(backendId) if modelAddr == "" { return nil, fmt.Errorf("backend %s is not currently loaded", backendId) } @@ -114,7 +114,7 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO()) if rpcErr != nil { log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) - val, slbErr := bms.SampleLocalBackendProcess(backendId) + val, slbErr := bm.SampleLocalBackendProcess(backendId) if slbErr != nil { return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) } @@ -131,10 +131,10 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status return status, nil } -func (bms BackendMonitorService) ShutdownModel(modelName string) error { - backendId, err := bms.getModelLoaderIDFromModelName(modelName) +func (bm BackendMonitor) ShutdownModel(modelName string) error { + backendId, err := bm.getModelLoaderIDFromModelName(modelName) if err != nil { return err } - return bms.modelLoader.ShutdownModel(backendId) + return bm.modelLoader.ShutdownModel(backendId) } diff --git a/core/services/gallery.go b/core/services/gallery.go index 1ef8e3e2..b068abbb 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -3,18 +3,14 @@ package services import ( "context" "encoding/json" - "errors" "os" - "path/filepath" "strings" "sync" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/embedded" - "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" "gopkg.in/yaml.v2" ) @@ -33,6 +29,18 @@ func NewGalleryService(modelPath string) *GalleryService { } } +func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error { + + config, err := gallery.GetGalleryConfigFromURL(req.URL) + if err != nil { + return err + } + + config.Files = append(config.Files, req.AdditionalFiles...) + + return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) +} + func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) { g.Lock() defer g.Unlock() @@ -84,10 +92,10 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) } } else if op.ConfigURL != "" { - PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) err = cl.Preload(g.modelPath) } else { - err = prepareModel(g.modelPath, op.Req, progressCallback) + err = prepareModel(g.modelPath, op.Req, cl, progressCallback) } if err != nil { @@ -119,12 +127,13 @@ type galleryModel struct { ID string `json:"id"` } -func processRequests(modelPath string, galleries []gallery.Gallery, requests []galleryModel) error { +func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error { var err error for _, r := range requests { utils.ResetDownloadTimers() if r.ID == "" { - err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction) + err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) + } else { if strings.Contains(r.ID, "@") { err = gallery.InstallModelFromGallery( @@ -149,7 +158,7 @@ func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, g return err } - return processRequests(modelPath, galleries, requests) + return processRequests(modelPath, s, cl, galleries, requests) } func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error { @@ -159,90 +168,5 @@ func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, return err } - return processRequests(modelPath, galleries, requests) -} - -// PreloadModelsConfigurations will preload models from the given list of URLs -// It will download the model if it is not already present in the model path -// It will also try to resolve if the model is an embedded model YAML configuration -func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { - for _, url := range models { - - // As a best effort, try to resolve the model from the remote library - // if it's not resolved we try with the other method below - if modelLibraryURL != "" { - lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) - if err == nil { - if lib[url] != "" { - log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) - url = lib[url] - } - } - } - - url = embedded.ModelShortURL(url) - switch { - case embedded.ExistsInModelsLibrary(url): - modelYAML, err := embedded.ResolveContent(url) - // If we resolve something, just save it to disk and continue - if err != nil { - log.Error().Err(err).Msg("error resolving model content") - continue - } - - log.Debug().Msgf("[startup] resolved embedded model: %s", url) - md5Name := utils.MD5(url) - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") - } - case downloader.LooksLikeURL(url): - log.Debug().Msgf("[startup] resolved model to download: %s", url) - - // md5 of model name - md5Name := utils.MD5(url) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - }) - if err != nil { - log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") - } - } - default: - if _, err := os.Stat(url); err == nil { - log.Debug().Msgf("[startup] resolved local model: %s", url) - // copy to modelPath - md5Name := utils.MD5(url) - - modelYAML, err := os.ReadFile(url) - if err != nil { - log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") - continue - } - - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") - } - } else { - log.Warn().Msgf("[startup] failed resolving model '%s'", url) - } - } - } -} - -func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64)) error { - - config, err := gallery.GetGalleryConfigFromURL(req.URL) - if err != nil { - return err - } - - config.Files = append(config.Files, req.AdditionalFiles...) - - return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) + return processRequests(modelPath, s, cl, galleries, requests) } diff --git a/core/services/list_models.go b/core/services/list_models.go deleted file mode 100644 index a21e6faf..00000000 --- a/core/services/list_models.go +++ /dev/null @@ -1,72 +0,0 @@ -package services - -import ( - "regexp" - - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/model" -) - -type ListModelsService struct { - bcl *config.BackendConfigLoader - ml *model.ModelLoader - appConfig *config.ApplicationConfig -} - -func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService { - return &ListModelsService{ - bcl: bcl, - ml: ml, - appConfig: appConfig, - } -} - -func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { - - models, err := lms.ml.ListModels() - if err != nil { - return nil, err - } - - var mm map[string]interface{} = map[string]interface{}{} - - dataModels := []schema.OpenAIModel{} - - var filterFn func(name string) bool - - // If filter is not specified, do not filter the list by model name - if filter == "" { - filterFn = func(_ string) bool { return true } - } else { - // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn - rxp, err := regexp.Compile(filter) - if err != nil { - return nil, err - } - filterFn = func(name string) bool { - return rxp.MatchString(name) - } - } - - // Start with the known configurations - for _, c := range lms.bcl.GetAllBackendConfigs() { - if excludeConfigured { - mm[c.Model] = nil - } - - if filterFn(c.Name) { - dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) - } - } - - // Then iterate through the loose files: - for _, m := range models { - // And only adds them if they shouldn't be skipped. - if _, exists := mm[m]; !exists && filterFn(m) { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) - } - } - - return dataModels, nil -} diff --git a/core/services/openai.go b/core/services/openai.go deleted file mode 100644 index 7a2679ad..00000000 --- a/core/services/openai.go +++ /dev/null @@ -1,808 +0,0 @@ -package services - -import ( - "encoding/json" - "errors" - "fmt" - "strings" - "sync" - "time" - - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/concurrency" - "github.com/go-skynet/LocalAI/pkg/grammar" - "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/google/uuid" - "github.com/imdario/mergo" - "github.com/rs/zerolog/log" -) - -type endpointGenerationConfigurationFn func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration - -type endpointConfiguration struct { - SchemaObject string - TemplatePath string - TemplateData model.PromptTemplateData - ResultMappingFn func(resp *backend.LLMResponse, index int) schema.Choice - CompletionMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] - TokenMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] -} - -// TODO: This is used for completion and edit. I am pretty sure I forgot parts, but fix it later. -func simpleMapper(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { - if resp.Error != nil || resp.Value == nil { - return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} - } - return concurrency.ErrorOr[*schema.OpenAIResponse]{ - Value: &schema.OpenAIResponse{ - Choices: []schema.Choice{ - { - Text: resp.Value.Response, - }, - }, - Usage: schema.OpenAIUsage{ - PromptTokens: resp.Value.Usage.Prompt, - CompletionTokens: resp.Value.Usage.Completion, - TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, - }, - }, - } -} - -// TODO: Consider alternative names for this. -// The purpose of this struct is to hold a reference to the OpenAI request context information -// This keeps things simple within core/services/openai.go and allows consumers to "see" this information if they need it -type OpenAIRequestTraceID struct { - ID string - Created int -} - -// This type split out from core/backend/llm.go - I'm still not _totally_ sure about this, but it seems to make sense to keep the generic LLM code from the OpenAI specific higher level functionality -type OpenAIService struct { - bcl *config.BackendConfigLoader - ml *model.ModelLoader - appConfig *config.ApplicationConfig - llmbs *backend.LLMBackendService -} - -func NewOpenAIService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig, llmbs *backend.LLMBackendService) *OpenAIService { - return &OpenAIService{ - bcl: bcl, - ml: ml, - appConfig: appConfig, - llmbs: llmbs, - } -} - -// Keeping in place as a reminder to POTENTIALLY ADD MORE VALIDATION HERE??? -func (oais *OpenAIService) getConfig(request *schema.OpenAIRequest) (*config.BackendConfig, *schema.OpenAIRequest, error) { - return oais.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, oais.appConfig) -} - -// TODO: It would be a lot less messy to make a return struct that had references to each of these channels -// INTENTIONALLY not doing that quite yet - I believe we need to let the references to unused channels die for the GC to automatically collect -- can we manually free()? -// finalResultsChannel is the primary async return path: one result for the entire request. -// promptResultsChannels is DUBIOUS. It's expected to be raw fan-out used within the function itself, but I am exposing for testing? One bundle of LLMResponseBundle per PromptString? Gets all N completions for a single prompt. -// completionsChannel is a channel that emits one *LLMResponse per generated completion, be that different prompts or N. Seems the most useful other than "entire request" Request is available to attempt tracing??? -// tokensChannel is a channel that emits one *LLMResponse per generated token. Let's see what happens! -func (oais *OpenAIService) Completion(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { - return endpointConfiguration{ - SchemaObject: "text_completion", - TemplatePath: bc.TemplateConfig.Completion, - TemplateData: model.PromptTemplateData{ - SystemPrompt: bc.SystemPrompt, - }, - ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { - return schema.Choice{ - Index: promptIndex, - FinishReason: "stop", - Text: resp.Response, - } - }, - CompletionMappingFn: simpleMapper, - TokenMappingFn: simpleMapper, - } - }, notifyOnPromptResult, notifyOnToken, nil) -} - -func (oais *OpenAIService) Edit(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { - - return endpointConfiguration{ - SchemaObject: "edit", - TemplatePath: bc.TemplateConfig.Edit, - TemplateData: model.PromptTemplateData{ - SystemPrompt: bc.SystemPrompt, - Instruction: request.Instruction, - }, - ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { - return schema.Choice{ - Index: promptIndex, - FinishReason: "stop", - Text: resp.Response, - } - }, - CompletionMappingFn: simpleMapper, - TokenMappingFn: simpleMapper, - } - }, notifyOnPromptResult, notifyOnToken, nil) -} - -func (oais *OpenAIService) Chat(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - return oais.GenerateFromMultipleMessagesChatRequest(request, notifyOnPromptResult, notifyOnToken, nil) -} - -func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest, endpointConfigFn endpointGenerationConfigurationFn, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - if initialTraceID == nil { - traceID = &OpenAIRequestTraceID{ - ID: uuid.New().String(), - Created: int(time.Now().Unix()), - } - } else { - traceID = initialTraceID - } - - bc, request, err := oais.getConfig(request) - if err != nil { - log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration") - return - } - - if request.ResponseFormat.Type == "json_object" { - request.Grammar = grammar.JSONBNF - } - - bc.Grammar = request.Grammar - - if request.Stream && len(bc.PromptStrings) > 1 { - log.Warn().Msg("potentially cannot handle more than 1 `PromptStrings` when Streaming?") - } - - rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - finalResultChannel = rawFinalResultChannel - promptResultsChannels = []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle]{} - var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - if notifyOnPromptResult { - rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - if notifyOnToken { - rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - - promptResultsChannelLock := sync.Mutex{} - - endpointConfig := endpointConfigFn(bc, request) - - if len(endpointConfig.TemplatePath) == 0 { - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { - endpointConfig.TemplatePath = bc.Model - } else { - log.Warn().Msgf("failed to find any template for %+v", request) - } - } - - setupWG := sync.WaitGroup{} - var prompts []string - if lPS := len(bc.PromptStrings); lPS > 0 { - setupWG.Add(lPS) - prompts = bc.PromptStrings - } else { - setupWG.Add(len(bc.InputStrings)) - prompts = bc.InputStrings - } - - var setupError error = nil - - for pI, p := range prompts { - - go func(promptIndex int, prompt string) { - if endpointConfig.TemplatePath != "" { - promptTemplateData := model.PromptTemplateData{ - Input: prompt, - } - err := mergo.Merge(promptTemplateData, endpointConfig.TemplateData, mergo.WithOverride) - if err == nil { - templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, endpointConfig.TemplatePath, promptTemplateData) - if err == nil { - prompt = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", prompt) - } - } - } - - log.Debug().Msgf("[OAIS GenerateTextFromRequest] Prompt: %q", prompt) - promptResultsChannel, completionChannels, tokenChannels, err := oais.llmbs.GenerateText(prompt, request, bc, - func(r *backend.LLMResponse) schema.Choice { - return endpointConfig.ResultMappingFn(r, promptIndex) - }, notifyOnPromptResult, notifyOnToken) - if err != nil { - log.Error().Msgf("Unable to generate text prompt: %q\nerr: %q", prompt, err) - promptResultsChannelLock.Lock() - setupError = errors.Join(setupError, err) - promptResultsChannelLock.Unlock() - setupWG.Done() - return - } - if notifyOnPromptResult { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(completionChannels, endpointConfig.CompletionMappingFn), rawCompletionsChannel, true) - } - if notifyOnToken { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, endpointConfig.TokenMappingFn), rawTokenChannel, true) - } - promptResultsChannelLock.Lock() - promptResultsChannels = append(promptResultsChannels, promptResultsChannel) - promptResultsChannelLock.Unlock() - setupWG.Done() - }(pI, p) - - } - setupWG.Wait() - - // If any of the setup goroutines experienced an error, quit early here. - if setupError != nil { - go func() { - log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup") - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError} - close(rawFinalResultChannel) - }() - return - } - - initialResponse := &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, - Object: endpointConfig.SchemaObject, - Usage: schema.OpenAIUsage{}, - } - - // utils.SliceOfChannelsRawMerger[[]schema.Choice](promptResultsChannels, rawFinalResultChannel, func(results []schema.Choice) (*schema.OpenAIResponse, error) { - concurrency.SliceOfChannelsReducer( - promptResultsChannels, rawFinalResultChannel, - func(iv concurrency.ErrorOr[*backend.LLMResponseBundle], result concurrency.ErrorOr[*schema.OpenAIResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { - - if iv.Error != nil { - result.Error = iv.Error - return result - } - result.Value.Usage.PromptTokens += iv.Value.Usage.Prompt - result.Value.Usage.CompletionTokens += iv.Value.Usage.Completion - result.Value.Usage.TotalTokens = result.Value.Usage.PromptTokens + result.Value.Usage.CompletionTokens - - result.Value.Choices = append(result.Value.Choices, iv.Value.Response...) - - return result - }, concurrency.ErrorOr[*schema.OpenAIResponse]{Value: initialResponse}, true) - - completionsChannel = rawCompletionsChannel - tokenChannel = rawTokenChannel - - return -} - -// TODO: For porting sanity, this is distinct from GenerateTextFromRequest and is _currently_ specific to Chat purposes -// this is not a final decision -- just a reality of moving a lot of parts at once -// / This has _become_ Chat which wasn't the goal... More cleanup in the future once it's stable? -func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - if initialTraceID == nil { - traceID = &OpenAIRequestTraceID{ - ID: uuid.New().String(), - Created: int(time.Now().Unix()), - } - } else { - traceID = initialTraceID - } - - bc, request, err := oais.getConfig(request) - if err != nil { - return - } - - // Allow the user to set custom actions via config file - // to be "embedded" in each model - noActionName := "answer" - noActionDescription := "use this action to answer without performing any action" - - if bc.FunctionsConfig.NoActionFunctionName != "" { - noActionName = bc.FunctionsConfig.NoActionFunctionName - } - if bc.FunctionsConfig.NoActionDescriptionName != "" { - noActionDescription = bc.FunctionsConfig.NoActionDescriptionName - } - - if request.ResponseFormat.Type == "json_object" { - request.Grammar = grammar.JSONBNF - } - - bc.Grammar = request.Grammar - - processFunctions := false - funcs := grammar.Functions{} - // process functions if we have any defined or if we have a function call string - if len(request.Functions) > 0 && bc.ShouldUseFunctions() { - log.Debug().Msgf("Response needs to process functions") - - processFunctions = true - - noActionGrammar := grammar.Function{ - Name: noActionName, - Description: noActionDescription, - Parameters: map[string]interface{}{ - "properties": map[string]interface{}{ - "message": map[string]interface{}{ - "type": "string", - "description": "The message to reply the user with", - }}, - }, - } - - // Append the no action function - funcs = append(funcs, request.Functions...) - if !bc.FunctionsConfig.DisableNoAction { - funcs = append(funcs, noActionGrammar) - } - - // Force picking one of the functions by the request - if bc.FunctionToCall() != "" { - funcs = funcs.Select(bc.FunctionToCall()) - } - - // Update input grammar - jsStruct := funcs.ToJSONStructure() - bc.Grammar = jsStruct.Grammar("", bc.FunctionsConfig.ParallelCalls) - } else if request.JSONFunctionGrammarObject != nil { - bc.Grammar = request.JSONFunctionGrammarObject.Grammar("", bc.FunctionsConfig.ParallelCalls) - } - - if request.Stream && processFunctions { - log.Warn().Msg("Streaming + Functions is highly experimental in this version") - } - - var predInput string - - if !bc.TemplateConfig.UseTokenizerTemplate || processFunctions { - - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range request.Messages { - var content string - role := i.Role - - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := bc.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := bc.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" - - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if bc.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: bc.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(request.Messages) - 1), - Function: bc.Grammar != "" && (messageIndex == (len(request.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := oais.ml.EvaluateTemplateForChatMessage(bc.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, bc.TemplateConfig.ChatMessage, err) - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", bc.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf - } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage - } - } - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) - } else { - content = fmt.Sprint(r, " ", string(j)) - } - } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) - } else { - content = string(j) - } - } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true - } - } - - mess = append(mess, content) - } - - predInput = strings.Join(mess, "\n") - - log.Debug().Msgf("Prompt (before templating): %s", predInput) - - templateFile := "" - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { - templateFile = bc.Model - } - - if bc.TemplateConfig.Chat != "" && !processFunctions { - templateFile = bc.TemplateConfig.Chat - } - - if bc.TemplateConfig.Functions != "" && processFunctions { - templateFile = bc.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: bc.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } - } - log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { - log.Debug().Msgf("Grammar: %+v", bc.Grammar) - } - - rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - if notifyOnPromptResult { - rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - if notifyOnToken { - rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - - rawResultChannel, individualCompletionChannels, tokenChannels, err := oais.llmbs.GenerateText(predInput, request, bc, func(resp *backend.LLMResponse) schema.Choice { - return schema.Choice{ - Index: 0, // ??? - FinishReason: "stop", - Message: &schema.Message{ - Role: "assistant", - Content: resp.Response, - }, - } - }, notifyOnPromptResult, notifyOnToken) - - chatSimpleMappingFn := func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { - if resp.Error != nil || resp.Value == nil { - return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} - } - return concurrency.ErrorOr[*schema.OpenAIResponse]{ - Value: &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{ - { - Delta: &schema.Message{ - Role: "assistant", - Content: resp.Value.Response, - }, - Index: 0, - }, - }, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: resp.Value.Usage.Prompt, - CompletionTokens: resp.Value.Usage.Completion, - TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, - }, - }, - } - } - - if notifyOnPromptResult { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(individualCompletionChannels, chatSimpleMappingFn), rawCompletionsChannel, true) - } - if notifyOnToken { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, chatSimpleMappingFn), rawTokenChannel, true) - } - - go func() { - rawResult := <-rawResultChannel - if rawResult.Error != nil { - log.Warn().Msgf("OpenAIService::processTools GenerateText error [DEBUG THIS?] %q", rawResult.Error) - return - } - llmResponseChoices := rawResult.Value.Response - - if processFunctions && len(llmResponseChoices) > 1 { - log.Warn().Msgf("chat functions response with %d choices in response, debug this?", len(llmResponseChoices)) - log.Debug().Msgf("%+v", llmResponseChoices) - } - - for _, result := range rawResult.Value.Response { - // If no functions, just return the raw result. - if !processFunctions { - - resp := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{result}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: rawResult.Value.Usage.Prompt, - CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, - }, - } - - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} - - continue - } - // At this point, things are function specific! - - // Oh no this can't be the right way to do this... but it works. Save us, mudler! - fString := fmt.Sprintf("%s", result.Message.Content) - results := parseFunctionCall(fString, bc.FunctionsConfig.ParallelCalls) - noActionToRun := (len(results) > 0 && results[0].name == noActionName) - - if noActionToRun { - log.Debug().Msg("-- noActionToRun branch --") - initialMessage := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: ""}}}, - Object: "stop", - } - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} - - result, err := oais.handleQuestion(bc, request, results[0].arguments, predInput) - if err != nil { - log.Error().Msgf("error handling question: %s", err.Error()) - return - } - - resp := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: rawResult.Value.Usage.Prompt, - CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, - }, - } - - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} - - } else { - log.Debug().Msgf("[GenerateFromMultipleMessagesChatRequest] fnResultsBranch: %+v", results) - for i, ss := range results { - name, args := ss.name, ss.arguments - - initialMessage := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - FinishReason: "function_call", - Message: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: traceID.ID, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - Arguments: args, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} - } - } - } - - close(rawFinalResultChannel) - }() - - finalResultChannel = rawFinalResultChannel - completionsChannel = rawCompletionsChannel - tokenChannel = rawTokenChannel - return -} - -func (oais *OpenAIService) handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, args, prompt string) (string, error) { - log.Debug().Msgf("[handleQuestion called] nothing to do, computing a reply") - - // If there is a message that the LLM already sends as part of the JSON reply, use it - arguments := map[string]interface{}{} - json.Unmarshal([]byte(args), &arguments) - m, exists := arguments["message"] - if exists { - switch message := m.(type) { - case string: - if message != "" { - log.Debug().Msgf("Reply received from LLM: %s", message) - message = oais.llmbs.Finetune(*config, prompt, message) - log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) - - return message, nil - } - } - } - - log.Debug().Msgf("No action received from LLM, without a message, computing a reply") - // Otherwise ask the LLM to understand the JSON output and the context, and return a message - // Note: This costs (in term of CPU/GPU) another computation - config.Grammar = "" - images := []string{} - for _, m := range input.Messages { - images = append(images, m.StringImages...) - } - - resultChannel, _, err := oais.llmbs.Inference(input.Context, &backend.LLMRequest{ - Text: prompt, - Images: images, - RawMessages: input.Messages, // Experimental - }, config, false) - - if err != nil { - log.Error().Msgf("inference setup error: %s", err.Error()) - return "", err - } - - raw := <-resultChannel - if raw.Error != nil { - log.Error().Msgf("inference error: %q", raw.Error.Error()) - return "", err - } - if raw.Value == nil { - log.Warn().Msgf("nil inference response") - return "", nil - } - return oais.llmbs.Finetune(*config, prompt, raw.Value.Response), nil -} - -type funcCallResults struct { - name string - arguments string -} - -func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { - - results := []funcCallResults{} - - // TODO: use generics to avoid this code duplication - if multipleResults { - ss := []map[string]interface{}{} - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - - for _, s := range ss { - func_name, ok := s["function"] - if !ok { - continue - } - args, ok := s["arguments"] - if !ok { - continue - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - continue - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - } else { - // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s := utils.EscapeNewLines(llmresult) - if err := json.Unmarshal([]byte(s), &ss); err != nil { - log.Error().Msgf("error unmarshalling JSON: %s", err.Error()) - return results - } - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name, ok := ss["function"] - if !ok { - log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult) - return results - } - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - if !ok { - log.Debug().Msg("ss[arguments] is not OK!") - return results - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - log.Debug().Msgf("unexpected func_name: %+v", func_name) - return results - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - return results -} diff --git a/core/startup/startup.go b/core/startup/startup.go index 92ccaa9d..6298f034 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -4,21 +4,17 @@ import ( "fmt" "os" - "github.com/go-skynet/LocalAI/core" - "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" - openaiendpoint "github.com/go-skynet/LocalAI/core/http/endpoints/openai" // TODO: This is dubious. Fix this when splitting assistant api up. "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/assets" "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" + pkgStartup "github.com/go-skynet/LocalAI/pkg/startup" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) -// (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { -func Startup(opts ...config.AppOption) (*core.Application, error) { +func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { options := config.NewApplicationConfig(opts...) zerolog.SetGlobalLevel(zerolog.InfoLevel) @@ -31,75 +27,68 @@ func Startup(opts ...config.AppOption) (*core.Application, error) { // Make sure directories exists if options.ModelPath == "" { - return nil, fmt.Errorf("options.ModelPath cannot be empty") + return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") } err := os.MkdirAll(options.ModelPath, 0755) if err != nil { - return nil, fmt.Errorf("unable to create ModelPath: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) } if options.ImageDir != "" { err := os.MkdirAll(options.ImageDir, 0755) if err != nil { - return nil, fmt.Errorf("unable to create ImageDir: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) } } if options.AudioDir != "" { err := os.MkdirAll(options.AudioDir, 0755) if err != nil { - return nil, fmt.Errorf("unable to create AudioDir: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) } } if options.UploadDir != "" { err := os.MkdirAll(options.UploadDir, 0755) if err != nil { - return nil, fmt.Errorf("unable to create UploadDir: %q", err) - } - } - if options.ConfigsDir != "" { - err := os.MkdirAll(options.ConfigsDir, 0755) - if err != nil { - return nil, fmt.Errorf("unable to create ConfigsDir: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) } } - // Load config jsons - utils.LoadConfig(options.UploadDir, openaiendpoint.UploadedFilesFile, &openaiendpoint.UploadedFiles) - utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsConfigFile, &openaiendpoint.Assistants) - utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsFileConfigFile, &openaiendpoint.AssistantFiles) + // + pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) - app := createApplication(options) + cl := config.NewBackendConfigLoader() + ml := model.NewModelLoader(options.ModelPath) - services.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) + configLoaderOpts := options.ToConfigLoaderOptions() - if err := app.BackendConfigLoader.LoadBackendConfigsFromPath(options.ModelPath, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { + if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config files") } if options.ConfigFile != "" { - if err := app.BackendConfigLoader.LoadBackendConfigFile(options.ConfigFile, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { + if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config file") } } - if err := app.BackendConfigLoader.Preload(options.ModelPath); err != nil { + if err := cl.Preload(options.ModelPath); err != nil { log.Error().Err(err).Msg("error downloading models") } if options.PreloadJSONModels != "" { - if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, app.BackendConfigLoader, options.Galleries); err != nil { - return nil, err + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil { + return nil, nil, nil, err } } if options.PreloadModelsFromPath != "" { - if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, app.BackendConfigLoader, options.Galleries); err != nil { - return nil, err + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil { + return nil, nil, nil, err } } if options.Debug { - for _, v := range app.BackendConfigLoader.ListBackendConfigs() { - cfg, _ := app.BackendConfigLoader.GetBackendConfig(v) + for _, v := range cl.ListBackendConfigs() { + cfg, _ := cl.GetBackendConfig(v) log.Debug().Msgf("Model: %s (config: %+v)", v, cfg) } } @@ -117,17 +106,17 @@ func Startup(opts ...config.AppOption) (*core.Application, error) { go func() { <-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down") - app.ModelLoader.StopAllGRPC() + ml.StopAllGRPC() }() if options.WatchDog { wd := model.NewWatchDog( - app.ModelLoader, + ml, options.WatchDogBusyTimeout, options.WatchDogIdleTimeout, options.WatchDogBusy, options.WatchDogIdle) - app.ModelLoader.SetWatchDog(wd) + ml.SetWatchDog(wd) go wd.Run() go func() { <-options.Context.Done() @@ -137,35 +126,5 @@ func Startup(opts ...config.AppOption) (*core.Application, error) { } log.Info().Msg("core/startup process completed!") - return app, nil -} - -// In Lieu of a proper DI framework, this function wires up the Application manually. -// This is in core/startup rather than core/state.go to keep package references clean! -func createApplication(appConfig *config.ApplicationConfig) *core.Application { - app := &core.Application{ - ApplicationConfig: appConfig, - BackendConfigLoader: config.NewBackendConfigLoader(), - ModelLoader: model.NewModelLoader(appConfig.ModelPath), - } - - var err error - - app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - - app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath) - app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) - - app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() - if err != nil { - log.Warn().Msg("Unable to initialize LocalAIMetricsService - non-fatal, optional service") - } - - return app + return cl, ml, options, nil } diff --git a/core/state.go b/core/state.go deleted file mode 100644 index cf0d614b..00000000 --- a/core/state.go +++ /dev/null @@ -1,41 +0,0 @@ -package core - -import ( - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/services" - "github.com/go-skynet/LocalAI/pkg/model" -) - -// TODO: Can I come up with a better name or location for this? -// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy -// Perhaps a proper DI system is worth it in the future, but for now keep things simple. -type Application struct { - - // Application-Level Config - ApplicationConfig *config.ApplicationConfig - // ApplicationState *ApplicationState - - // Core Low-Level Services - BackendConfigLoader *config.BackendConfigLoader - ModelLoader *model.ModelLoader - - // Backend Services - EmbeddingsBackendService *backend.EmbeddingsBackendService - ImageGenerationBackendService *backend.ImageGenerationBackendService - LLMBackendService *backend.LLMBackendService - TranscriptionBackendService *backend.TranscriptionBackendService - TextToSpeechBackendService *backend.TextToSpeechBackendService - - // LocalAI System Services - BackendMonitorService *services.BackendMonitorService - GalleryService *services.GalleryService - ListModelsService *services.ListModelsService - LocalAIMetricsService *services.LocalAIMetricsService - OpenAIService *services.OpenAIService -} - -// TODO [NEXT PR?]: Break up ApplicationConfig. -// Migrate over stuff that is not set via config at all - especially runtime stuff -type ApplicationState struct { -} diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru deleted file mode 100644 index c33bafe1..00000000 --- a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru +++ /dev/null @@ -1,25 +0,0 @@ -meta { - name: -completions Stream - type: http - seq: 4 -} - -post { - url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions - body: json - auth: none -} - -headers { - Content-Type: application/json -} - -body:json { - { - "model": "{{DEFAULT_MODEL}}", - "prompt": "function downloadFile(string url, string outputPath) {", - "max_tokens": 256, - "temperature": 0.5, - "stream": true - } -} diff --git a/pkg/concurrency/concurrency.go b/pkg/concurrency/concurrency.go deleted file mode 100644 index 324e8cc5..00000000 --- a/pkg/concurrency/concurrency.go +++ /dev/null @@ -1,135 +0,0 @@ -package concurrency - -import ( - "sync" -) - -// TODO: closeWhenDone bool parameter :: -// It currently is experimental, and therefore exists. -// Is there ever a situation to use false? - -// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of a second type. -// mappingFn allows the caller to convert from the input type to the output type -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsRawMerger[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan IndividualResultType, outputChannel chan<- OutputResultType, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { - var wg sync.WaitGroup - wg.Add(len(individualResultChannels)) - mergingFn := func(c <-chan IndividualResultType) { - for r := range c { - mr, err := mappingFn(r) - if err == nil { - outputChannel <- mr - } - } - wg.Done() - } - for _, irc := range individualResultChannels { - go mergingFn(irc) - } - if closeWhenDone { - go func() { - wg.Wait() - close(outputChannel) - }() - } - - return &wg -} - -// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of THE SAME TYPE. -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsRawMergerWithoutMapping[ResultType any](individualResultsChannels []<-chan ResultType, outputChannel chan<- ResultType, closeWhenDone bool) *sync.WaitGroup { - return SliceOfChannelsRawMerger(individualResultsChannels, outputChannel, func(v ResultType) (ResultType, error) { return v, nil }, closeWhenDone) -} - -// This function is used to merge the results of a slice of channels of a specific result type down to a single succcess result channel of a second type, and an error channel -// mappingFn allows the caller to convert from the input type to the output type -// This variant is designed to be aware of concurrency.ErrorOr[T], splitting successes from failures. -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsMergerWithErrors[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan ErrorOr[IndividualResultType], successChannel chan<- OutputResultType, errorChannel chan<- error, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { - var wg sync.WaitGroup - wg.Add(len(individualResultChannels)) - mergingFn := func(c <-chan ErrorOr[IndividualResultType]) { - for r := range c { - if r.Error != nil { - errorChannel <- r.Error - } else { - mv, err := mappingFn(r.Value) - if err != nil { - errorChannel <- err - } else { - successChannel <- mv - } - } - } - wg.Done() - } - for _, irc := range individualResultChannels { - go mergingFn(irc) - } - if closeWhenDone { - go func() { - wg.Wait() - close(successChannel) - close(errorChannel) - }() - } - return &wg -} - -// This function is used to reduce down the results of a slice of channels of a specific result type down to a single result value of a second type. -// reducerFn allows the caller to convert from the input type to the output type -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsReducer[InputResultType any, OutputResultType any](individualResultsChannels []<-chan InputResultType, outputChannel chan<- OutputResultType, - reducerFn func(iv InputResultType, ov OutputResultType) OutputResultType, initialValue OutputResultType, closeWhenDone bool) (wg *sync.WaitGroup) { - wg = &sync.WaitGroup{} - wg.Add(len(individualResultsChannels)) - reduceLock := sync.Mutex{} - reducingFn := func(c <-chan InputResultType) { - for iv := range c { - reduceLock.Lock() - initialValue = reducerFn(iv, initialValue) - reduceLock.Unlock() - } - wg.Done() - } - for _, irc := range individualResultsChannels { - go reducingFn(irc) - } - go func() { - wg.Wait() - outputChannel <- initialValue - if closeWhenDone { - close(outputChannel) - } - }() - return wg -} - -// This function is primarily designed to be used in combination with the above utility functions. -// A slice of input result channels of a specific type is provided, along with a function to map those values to another type -// A slice of output result channels is returned, where each value is mapped as it comes in. -// The order of the slice will be retained. -func SliceOfChannelsTransformer[InputResultType any, OutputResultType any](inputChanels []<-chan InputResultType, mappingFn func(v InputResultType) OutputResultType) (outputChannels []<-chan OutputResultType) { - rawOutputChannels := make([]<-chan OutputResultType, len(inputChanels)) - - transformingFn := func(ic <-chan InputResultType, oc chan OutputResultType) { - for iv := range ic { - oc <- mappingFn(iv) - } - close(oc) - } - - for ci, c := range inputChanels { - roc := make(chan OutputResultType) - go transformingFn(c, roc) - rawOutputChannels[ci] = roc - } - - outputChannels = rawOutputChannels - return -} diff --git a/pkg/concurrency/concurrency_test.go b/pkg/concurrency/concurrency_test.go deleted file mode 100644 index fedd74be..00000000 --- a/pkg/concurrency/concurrency_test.go +++ /dev/null @@ -1,101 +0,0 @@ -package concurrency_test - -// TODO: noramlly, these go in utils_tests, right? Why does this cause problems only in pkg/utils? - -import ( - "fmt" - "slices" - - . "github.com/go-skynet/LocalAI/pkg/concurrency" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -var _ = Describe("utils/concurrency tests", func() { - It("SliceOfChannelsReducer works", func() { - individualResultsChannels := []<-chan int{} - initialValue := 0 - for i := 0; i < 3; i++ { - c := make(chan int) - go func(i int, c chan int) { - for ii := 1; ii < 4; ii++ { - c <- (i * ii) - } - close(c) - }(i, c) - individualResultsChannels = append(individualResultsChannels, c) - } - Expect(len(individualResultsChannels)).To(Equal(3)) - finalResultChannel := make(chan int) - wg := SliceOfChannelsReducer[int, int](individualResultsChannels, finalResultChannel, func(input int, val int) int { - return val + input - }, initialValue, true) - - Expect(wg).ToNot(BeNil()) - - result := <-finalResultChannel - - Expect(result).ToNot(Equal(0)) - Expect(result).To(Equal(18)) - }) - - It("SliceOfChannelsRawMergerWithoutMapping works", func() { - individualResultsChannels := []<-chan int{} - for i := 0; i < 3; i++ { - c := make(chan int) - go func(i int, c chan int) { - for ii := 1; ii < 4; ii++ { - c <- (i * ii) - } - close(c) - }(i, c) - individualResultsChannels = append(individualResultsChannels, c) - } - Expect(len(individualResultsChannels)).To(Equal(3)) - outputChannel := make(chan int) - wg := SliceOfChannelsRawMergerWithoutMapping(individualResultsChannels, outputChannel, true) - Expect(wg).ToNot(BeNil()) - outputSlice := []int{} - for v := range outputChannel { - outputSlice = append(outputSlice, v) - } - Expect(len(outputSlice)).To(Equal(9)) - slices.Sort(outputSlice) - Expect(outputSlice[0]).To(BeZero()) - Expect(outputSlice[3]).To(Equal(1)) - Expect(outputSlice[8]).To(Equal(6)) - }) - - It("SliceOfChannelsTransformer works", func() { - individualResultsChannels := []<-chan int{} - for i := 0; i < 3; i++ { - c := make(chan int) - go func(i int, c chan int) { - for ii := 1; ii < 4; ii++ { - c <- (i * ii) - } - close(c) - }(i, c) - individualResultsChannels = append(individualResultsChannels, c) - } - Expect(len(individualResultsChannels)).To(Equal(3)) - mappingFn := func(i int) string { - return fmt.Sprintf("$%d", i) - } - - outputChannels := SliceOfChannelsTransformer(individualResultsChannels, mappingFn) - Expect(len(outputChannels)).To(Equal(3)) - rSlice := []string{} - for ii := 1; ii < 4; ii++ { - for i := 0; i < 3; i++ { - res := <-outputChannels[i] - rSlice = append(rSlice, res) - } - } - slices.Sort(rSlice) - Expect(rSlice[0]).To(Equal("$0")) - Expect(rSlice[3]).To(Equal("$1")) - Expect(rSlice[8]).To(Equal("$6")) - }) -}) diff --git a/pkg/concurrency/types.go b/pkg/concurrency/types.go deleted file mode 100644 index 76081ba3..00000000 --- a/pkg/concurrency/types.go +++ /dev/null @@ -1,6 +0,0 @@ -package concurrency - -type ErrorOr[T any] struct { - Value T - Error error -} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 49a6b1bd..8fb8c39d 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -41,7 +41,7 @@ type Backend interface { PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) - AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) + AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error) diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index c0b4bc34..0af5d94f 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error { return fmt.Errorf("unimplemented") } -func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) { - return schema.TranscriptionResult{}, fmt.Errorf("unimplemented") +func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) { + return schema.Result{}, fmt.Errorf("unimplemented") } func (llm *Base) TTS(*pb.TTSRequest) error { diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 0e0e56c7..882db12a 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp return client.TTS(ctx, in, opts...) } -func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { +func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques if err != nil { return nil, err } - tresult := &schema.TranscriptionResult{} + tresult := &schema.Result{} for _, s := range res.Segments { tks := []int{} for _, t := range s.Tokens { diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index b4ba4884..73b185a3 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc. return e.s.TTS(ctx, in) } -func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { +func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { r, err := e.s.AudioTranscription(ctx, in) if err != nil { return nil, err } - tr := &schema.TranscriptionResult{} + tr := &schema.Result{} for _, s := range r.Segments { var tks []int for _, t := range s.Tokens { diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index aa7a3fbc..4d06544d 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -15,7 +15,7 @@ type LLM interface { Load(*pb.ModelOptions) error Embeddings(*pb.PredictOptions) ([]float32, error) GenerateImage(*pb.GenerateImageRequest) error - AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) + AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) TTS(*pb.TTSRequest) error TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error) Status() (pb.StatusResponse, error) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 617d8f62..5d9808a4 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -81,7 +81,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if _, err := os.Stat(uri); err == nil { serverAddress, err := getFreeAddress() if err != nil { - return "", fmt.Errorf("%s failed allocating free ports: %s", backend, err.Error()) + return "", fmt.Errorf("failed allocating free ports: %s", err.Error()) } // Make sure the process is executable if err := ml.startProcess(uri, o.model, serverAddress); err != nil { @@ -134,7 +134,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if !ready { log.Debug().Msgf("GRPC Service NOT ready") - return "", fmt.Errorf("%s grpc service not ready", backend) + return "", fmt.Errorf("grpc service not ready") } options := *o.gRPCOptions @@ -145,10 +145,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options) if err != nil { - return "", fmt.Errorf("\"%s\" could not load model: %w", backend, err) + return "", fmt.Errorf("could not load model: %w", err) } if !res.Success { - return "", fmt.Errorf("\"%s\" could not load model (no success): %s", backend, res.Message) + return "", fmt.Errorf("could not load model (no success): %s", res.Message) } return client, nil diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go new file mode 100644 index 00000000..b09516a7 --- /dev/null +++ b/pkg/startup/model_preload.go @@ -0,0 +1,85 @@ +package startup + +import ( + "errors" + "os" + "path/filepath" + + "github.com/go-skynet/LocalAI/embedded" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" +) + +// PreloadModelsConfigurations will preload models from the given list of URLs +// It will download the model if it is not already present in the model path +// It will also try to resolve if the model is an embedded model YAML configuration +func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { + for _, url := range models { + + // As a best effort, try to resolve the model from the remote library + // if it's not resolved we try with the other method below + if modelLibraryURL != "" { + lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) + if err == nil { + if lib[url] != "" { + log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) + url = lib[url] + } + } + } + + url = embedded.ModelShortURL(url) + switch { + case embedded.ExistsInModelsLibrary(url): + modelYAML, err := embedded.ResolveContent(url) + // If we resolve something, just save it to disk and continue + if err != nil { + log.Error().Err(err).Msg("error resolving model content") + continue + } + + log.Debug().Msgf("[startup] resolved embedded model: %s", url) + md5Name := utils.MD5(url) + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") + } + case downloader.LooksLikeURL(url): + log.Debug().Msgf("[startup] resolved model to download: %s", url) + + // md5 of model name + md5Name := utils.MD5(url) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + }) + if err != nil { + log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") + } + } + default: + if _, err := os.Stat(url); err == nil { + log.Debug().Msgf("[startup] resolved local model: %s", url) + // copy to modelPath + md5Name := utils.MD5(url) + + modelYAML, err := os.ReadFile(url) + if err != nil { + log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") + continue + } + + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") + } + } else { + log.Warn().Msgf("[startup] failed resolving model '%s'", url) + } + } + } +} diff --git a/core/services/model_preload_test.go b/pkg/startup/model_preload_test.go similarity index 96% rename from core/services/model_preload_test.go rename to pkg/startup/model_preload_test.go index fc65d565..63a8f8b0 100644 --- a/core/services/model_preload_test.go +++ b/pkg/startup/model_preload_test.go @@ -1,14 +1,13 @@ -package services_test +package startup_test import ( "fmt" "os" "path/filepath" + . "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" - . "github.com/go-skynet/LocalAI/core/services" - . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go deleted file mode 100644 index 769d8a88..00000000 --- a/pkg/utils/base64.go +++ /dev/null @@ -1,50 +0,0 @@ -package utils - -import ( - "encoding/base64" - "fmt" - "io" - "net/http" - "strings" - "time" -) - -var base64DownloadClient http.Client = http.Client{ - Timeout: 30 * time.Second, -} - -// this function check if the string is an URL, if it's an URL downloads the image in memory -// encodes it in base64 and returns the base64 string - -// This may look weird down in pkg/utils while it is currently only used in core/config -// -// but I believe it may be useful for MQTT as well in the near future, so I'm -// extracting it while I'm thinking of it. -func GetImageURLAsBase64(s string) (string, error) { - if strings.HasPrefix(s, "http") { - // download the image - resp, err := base64DownloadClient.Get(s) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // read the image data into memory - data, err := io.ReadAll(resp.Body) - if err != nil { - return "", err - } - - // encode the image data in base64 - encoded := base64.StdEncoding.EncodeToString(data) - - // return the base64 string - return encoded, nil - } - - // if the string instead is prefixed with "data:image/jpeg;base64,", drop it - if strings.HasPrefix(s, "data:image/jpeg;base64,") { - return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil - } - return "", fmt.Errorf("not valid string") -} From e9f090257c57181ffd411052e6b818ff6f09550f Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Wed, 17 Apr 2024 20:59:05 -0500 Subject: [PATCH 0321/2895] fix: adjust some sources names to match the naming of their repositories (#2061) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 60 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 6715e91e..d236f860 100644 --- a/Makefile +++ b/Makefile @@ -179,20 +179,20 @@ endif all: help ## BERT embeddings -sources/go-bert: - git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert - cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1 +sources/go-bert.cpp: + git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp + cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1 -sources/go-bert/libgobert.a: sources/go-bert - $(MAKE) -C sources/go-bert libgobert.a +sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp + $(MAKE) -C sources/go-bert.cpp libgobert.a -## go-llama-ggml -sources/go-llama-ggml: - git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml - cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1 +## go-llama.cpp +sources/go-llama.cpp: + git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp + cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1 -sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml - $(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a +sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp + $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a ## go-piper sources/go-piper: @@ -211,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a ## RWKV -sources/go-rwkv: - git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv - cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1 +sources/go-rwkv.cpp: + git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp + cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1 -sources/go-rwkv/librwkv.a: sources/go-rwkv - cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. +sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp + cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. ## stable diffusion sources/go-stable-diffusion: @@ -236,23 +236,24 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream ## whisper sources/whisper.cpp: - git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp + git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && make libwhisper.a -get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream +get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream replace: - $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv + $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go - $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert + $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang + $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp dropreplace: $(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp @@ -271,12 +272,12 @@ prepare-sources: get-sources replace ## GENERIC rebuild: ## Rebuilds the project $(GOCMD) clean -cache - $(MAKE) -C sources/go-llama-ggml clean + $(MAKE) -C sources/go-llama.cpp clean $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean - $(MAKE) -C sources/go-rwkv clean + $(MAKE) -C sources/go-rwkv.cpp clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean - $(MAKE) -C sources/go-bert clean + $(MAKE) -C sources/go-bert.cpp clean $(MAKE) -C sources/go-piper clean $(MAKE) -C sources/go-tiny-dream clean $(MAKE) build @@ -598,8 +599,8 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/ backend-assets/grpc: protogen-go replace mkdir -p backend-assets/grpc -backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \ +backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/ backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc @@ -641,17 +642,16 @@ ifeq ($(BUILD_TYPE),metal) cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/ endif -backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc - $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \ +backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ -backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ +backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc From 502c1eedaa61ae742bfd6eb2e074e6f1180c2c66 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Wed, 17 Apr 2024 22:21:55 -0500 Subject: [PATCH 0322/2895] feat: refactor the dynamic json configs for api_keys and external_backends (#2055) * feat: refactor the dynamic json configs for api_keys and external_backends Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove commented code Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/cli/run.go | 13 +-- core/config/application_config.go | 7 ++ core/startup/config_file_watcher.go | 154 +++++++++++++++++++--------- core/startup/startup.go | 5 + 4 files changed, 117 insertions(+), 62 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index 0f3ba2de..d729f946 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -2,7 +2,6 @@ package cli import ( "fmt" - "os" "strings" "time" @@ -65,6 +64,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithAudioDir(r.AudioPath), config.WithUploadDir(r.UploadPath), config.WithConfigsDir(r.ConfigPath), + config.WithDynamicConfigDir(r.LocalaiConfigDir), config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), config.WithModelLibraryURL(r.RemoteLibrary), @@ -134,17 +134,6 @@ func (r *RunCMD) Run(ctx *Context) error { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } - // Watch the configuration directory - // If the directory does not exist, we don't watch it - if _, err := os.Stat(r.LocalaiConfigDir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) - defer closeConfigWatcherFn() - - if err != nil { - return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir) - } - } - appHTTP, err := http.App(cl, ml, options) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") diff --git a/core/config/application_config.go b/core/config/application_config.go index 9525553a..77817616 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -22,6 +22,7 @@ type ApplicationConfig struct { AudioDir string UploadDir string ConfigsDir string + DynamicConfigsDir string CORS bool PreloadJSONModels string PreloadModelsFromPath string @@ -264,6 +265,12 @@ func WithConfigsDir(configsDir string) AppOption { } } +func WithDynamicConfigDir(dynamicConfigsDir string) AppOption { + return func(o *ApplicationConfig) { + o.DynamicConfigsDir = dynamicConfigsDir + } +} + func WithApiKeys(apiKeys []string) AppOption { return func(o *ApplicationConfig) { o.ApiKeys = apiKeys diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 9c758e25..5d213df5 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -12,89 +12,143 @@ import ( "github.com/rs/zerolog/log" ) -type WatchConfigDirectoryCloser func() error +type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error -func ReadApiKeysJson(configDir string, appConfig *config.ApplicationConfig) error { - fileContent, err := os.ReadFile(path.Join(configDir, "api_keys.json")) - if err == nil { - // Parse JSON content from the file - var fileKeys []string - err := json.Unmarshal(fileContent, &fileKeys) - if err == nil { - appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) - return nil - } - return err - } - return err +type configFileHandler struct { + handlers map[string]fileHandler + + watcher *fsnotify.Watcher + + configDir string + appConfig *config.ApplicationConfig } -func ReadExternalBackendsJson(configDir string, appConfig *config.ApplicationConfig) error { - fileContent, err := os.ReadFile(path.Join(configDir, "external_backends.json")) - if err != nil { - return err +// TODO: This should be a singleton eventually so other parts of the code can register config file handlers, +// then we can export it to other packages +func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler { + c := configFileHandler{ + handlers: make(map[string]fileHandler), + configDir: appConfig.DynamicConfigsDir, + appConfig: appConfig, } - // Parse JSON content from the file - var fileBackends map[string]string - err = json.Unmarshal(fileContent, &fileBackends) - if err != nil { - return err + c.Register("api_keys.json", readApiKeysJson(*appConfig), true) + c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) + return c +} + +func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error { + _, ok := c.handlers[filename] + if ok { + return fmt.Errorf("handler already registered for file %s", filename) } - err = mergo.Merge(&appConfig.ExternalGRPCBackends, fileBackends) - if err != nil { - return err + c.handlers[filename] = handler + if runNow { + c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler) } return nil } -var CONFIG_FILE_UPDATES = map[string]func(configDir string, appConfig *config.ApplicationConfig) error{ - "api_keys.json": ReadApiKeysJson, - "external_backends.json": ReadExternalBackendsJson, +func (c *configFileHandler) callHandler(filename string, handler fileHandler) { + fileContent, err := os.ReadFile(filename) + if err != nil && !os.IsNotExist(err) { + log.Error().Err(err).Str("filename", filename).Msg("could not read file") + } + + if err = handler(fileContent, c.appConfig); err != nil { + log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options") + } } -func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) (WatchConfigDirectoryCloser, error) { - if len(configDir) == 0 { - return nil, fmt.Errorf("configDir blank") - } +func (c *configFileHandler) Watch() error { configWatcher, err := fsnotify.NewWatcher() + c.watcher = configWatcher if err != nil { - log.Fatal().Msgf("Unable to create a watcher for the LocalAI Configuration Directory: %+v", err) - } - ret := func() error { - configWatcher.Close() - return nil + log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory") } // Start listening for events. go func() { for { select { - case event, ok := <-configWatcher.Events: + case event, ok := <-c.watcher.Events: if !ok { return } - if event.Has(fsnotify.Write) { - for targetName, watchFn := range CONFIG_FILE_UPDATES { - if event.Name == targetName { - err := watchFn(configDir, appConfig) - log.Warn().Msgf("WatchConfigDirectory goroutine for %s: failed to update options: %+v", targetName, err) - } + if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) { + handler, ok := c.handlers[path.Base(event.Name)] + if !ok { + continue } + + c.callHandler(event.Name, handler) } - case _, ok := <-configWatcher.Errors: + case err, ok := <-c.watcher.Errors: + log.Error().Err(err).Msg("config watcher error received") if !ok { return } - log.Error().Err(err).Msg("error encountered while watching config directory") } } }() // Add a path. - err = configWatcher.Add(configDir) + err = c.watcher.Add(c.appConfig.DynamicConfigsDir) if err != nil { - return ret, fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err) + return fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err) } - return ret, nil + return nil +} + +// TODO: When we institute graceful shutdown, this should be called +func (c *configFileHandler) Stop() { + c.watcher.Close() +} + +func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { + handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { + log.Debug().Msg("processing api_keys.json") + + if len(fileContent) > 0 { + // Parse JSON content from the file + var fileKeys []string + err := json.Unmarshal(fileContent, &fileKeys) + if err != nil { + return err + } + + appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...) + } else { + appConfig.ApiKeys = startupAppConfig.ApiKeys + } + log.Debug().Msg("api keys loaded from api_keys.json") + return nil + } + + return handler +} + +func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler { + handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { + log.Debug().Msg("processing external_backends.json") + + if len(fileContent) > 0 { + // Parse JSON content from the file + var fileBackends map[string]string + err := json.Unmarshal(fileContent, &fileBackends) + if err != nil { + return err + } + appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends + err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends) + if err != nil { + return err + } + } else { + appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends + } + log.Debug().Msg("external backends loaded from external_backends.json") + return nil + } + return handler } diff --git a/core/startup/startup.go b/core/startup/startup.go index 6298f034..af92f0e1 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -125,6 +125,11 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode }() } + // Watch the configuration directory + // If the directory does not exist, we don't watch it + configHandler := newConfigFileHandler(options) + configHandler.Watch() + log.Info().Msg("core/startup process completed!") return cl, ml, options, nil } From f9c75d487851749d3b382f64bb3d8a9bf52d94dd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 18 Apr 2024 10:57:24 +0200 Subject: [PATCH 0323/2895] tests: add template tests (#2063) Signed-off-by: Ettore Di Giacinto --- pkg/model/loader_test.go | 105 ++++++++++++++++++++++++++++++++++ pkg/model/model_suite_test.go | 13 +++++ 2 files changed, 118 insertions(+) create mode 100644 pkg/model/loader_test.go create mode 100644 pkg/model/model_suite_test.go diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go new file mode 100644 index 00000000..4c3c1a88 --- /dev/null +++ b/pkg/model/loader_test.go @@ -0,0 +1,105 @@ +package model_test + +import ( + "github.com/go-skynet/LocalAI/pkg/model" + . "github.com/go-skynet/LocalAI/pkg/model" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} +{{- if .FunctionCall }} + +{{- else if eq .RoleName "tool" }} + +{{- end }} +{{- if .Content}} +{{.Content }} +{{- end }} +{{- if .FunctionCall}} +{{toJson .FunctionCall}} +{{- end }} +{{- if .FunctionCall }} + +{{- else if eq .RoleName "tool" }} + +{{- end }} +<|im_end|>` + +var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "template": chatML, + "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "user", + RoleName: "user", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "assistant": { + "template": chatML, + "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_call": { + "template": chatML, + "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "", + FunctionCall: map[string]string{"function": "test"}, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_response": { + "template": chatML, + "expected": "<|im_start|>tool\n\nResponse from tool\n\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "tool", + RoleName: "tool", + Content: "Response from tool", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, +} + +var _ = Describe("Templates", func() { + Context("chat message", func() { + modelLoader := NewModelLoader("") + for key := range testMatch { + foo := testMatch[key] + It("renders correctly "+key, func() { + templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) + Expect(err).ToNot(HaveOccurred()) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) +}) diff --git a/pkg/model/model_suite_test.go b/pkg/model/model_suite_test.go new file mode 100644 index 00000000..6fa9c004 --- /dev/null +++ b/pkg/model/model_suite_test.go @@ -0,0 +1,13 @@ +package model_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestModel(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI model test") +} From 8f2681f90420b4818ee270b4ad7c570ed462b09c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 17:17:33 +0000 Subject: [PATCH 0324/2895] build(deps): bump aiohttp from 3.9.2 to 3.9.4 in /examples/langchain/langchainpy-localai-example in the pip group across 1 directory (#2067) build(deps): bump aiohttp Bumps the pip group with 1 update in the /examples/langchain/langchainpy-localai-example directory: [aiohttp](https://github.com/aio-libs/aiohttp). Updates `aiohttp` from 3.9.2 to 3.9.4 - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.2...v3.9.4) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- examples/langchain/langchainpy-localai-example/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 1e63b0bf..ba7f8429 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,4 +1,4 @@ -aiohttp==3.9.2 +aiohttp==3.9.4 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 From 13012cfa70d8440a78d3a9c88500597c8cc8ed98 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:19:36 -0500 Subject: [PATCH 0325/2895] feat: better control of GRPC docker cache (#2070) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 90 ++++++++++++++++++++++ .github/workflows/image-pr.yml | 9 ++- .github/workflows/image.yml | 22 ++++-- .github/workflows/image_build.yml | 15 ++-- Dockerfile | 3 +- 5 files changed, 126 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/generate_grpc_cache.yaml diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml new file mode 100644 index 00000000..11abc10a --- /dev/null +++ b/.github/workflows/generate_grpc_cache.yaml @@ -0,0 +1,90 @@ +name: 'generate and publish GRPC docker caches' + +on: +- workflow_dispatch + +concurrency: + group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + generate_caches: + strategy: + matrix: + include: + - grpc-base-image: ubuntu:22.04 + runs-on: 'ubuntu-latest' + platforms: 'linux/amd64' + runs-on: ${{matrix.runs-on}} + steps: + - name: Release space from worker + if: matrix.runs-on == 'ubuntu-latest' + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + sudo apt-get remove --auto-remove android-sdk-platform-tools || true + sudo apt-get purge --auto-remove android-sdk-platform-tools || true + sudo rm -rf /usr/local/lib/android + sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^mono-.*' || true + sudo apt-get remove -y '^ghc-.*' || true + sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + sudo apt-get remove -y 'php.*' || true + sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + sudo apt-get remove -y '^google-.*' || true + sudo apt-get remove -y azure-cli || true + sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + sudo apt-get remove -y '^gfortran-.*' || true + sudo apt-get remove -y microsoft-edge-stable || true + sudo apt-get remove -y firefox || true + sudo apt-get remove -y powershell || true + sudo apt-get remove -y r-base-core || true + sudo apt-get autoremove -y + sudo apt-get clean + echo + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + sudo rm -rfv build || true + sudo rm -rf /usr/share/dotnet || true + sudo rm -rf /opt/ghc || true + sudo rm -rf "/usr/local/share/boost" || true + sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + df -h + + - name: Set up QEMU + uses: docker/setup-qemu-action@master + with: + platforms: all + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@master + + - name: Checkout + uses: actions/checkout@v4 + + - name: Cache GRPC + uses: docker/build-push-action@v5 + with: + builder: ${{ steps.buildx.outputs.name }} + # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. + # This means that even the MAKEFLAGS have to be an EXACT match. + # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. + build-args: | + GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} + MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_VERSION=v1.58.0 + context: . + file: ./Dockerfile + cache-to: type=gha,ignore-error=true + target: grpc + platforms: ${{ matrix.platforms }} + push: false \ No newline at end of file diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index b703b16d..9c4fece7 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -22,6 +22,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} @@ -61,12 +62,14 @@ jobs: ffmpeg: 'false' image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -85,6 +88,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} @@ -102,11 +106,12 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -122,4 +127,4 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" \ No newline at end of file + makeflags: "--jobs=4 --output-sync=target" \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index d2607579..255c1c65 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -26,6 +26,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} aio: ${{ matrix.aio }} makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} @@ -129,6 +130,7 @@ jobs: image-type: 'extras' aio: "-aio-gpu-hipblas" base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" latest-image: 'latest-gpu-hipblas' latest-image-aio: 'latest-aio-gpu-hipblas' runs-on: 'arc-runner-set' @@ -140,12 +142,14 @@ jobs: ffmpeg: 'false' image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'auto' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -158,6 +162,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -171,6 +176,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-core' ffmpeg: 'false' image-type: 'core' @@ -180,6 +186,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-core' ffmpeg: 'false' image-type: 'core' @@ -189,6 +196,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -198,6 +206,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -210,6 +219,7 @@ jobs: ffmpeg: 'true' image-type: 'core' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' @@ -219,6 +229,7 @@ jobs: ffmpeg: 'false' image-type: 'core' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" @@ -236,6 +247,7 @@ jobs: runs-on: ${{ matrix.runs-on }} aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} latest-image-aio: ${{ matrix.latest-image-aio }} @@ -258,7 +270,7 @@ jobs: aio: "-aio-cpu" latest-image: 'latest-cpu' latest-image-aio: 'latest-aio-cpu' - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -269,7 +281,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -280,7 +292,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -291,7 +303,7 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -302,4 +314,4 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index b0684a4c..b06100ff 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -6,6 +6,10 @@ on: inputs: base-image: description: 'Base image' + required: true + type: string + grpc-base-image: + description: 'GRPC Base image, must be a compatible image with base-image' required: false default: '' type: string @@ -57,7 +61,7 @@ on: makeflags: description: 'Make Flags' required: false - default: '--jobs=3 --output-sync=target' + default: '--jobs=4 --output-sync=target' type: string aio: description: 'AIO Image Name' @@ -201,15 +205,16 @@ jobs: uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} + # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. + # This means that even the MAKEFLAGS have to be an EXACT match. + # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. build-args: | - IMAGE_TYPE=${{ inputs.image-type }} - BASE_IMAGE=${{ inputs.base-image }} - MAKEFLAGS=${{ inputs.makeflags }} + GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} + MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.58.0 context: . file: ./Dockerfile cache-from: type=gha - cache-to: type=gha,ignore-error=true target: grpc platforms: ${{ inputs.platforms }} push: false diff --git a/Dockerfile b/Dockerfile index 397fbe22..805ac3a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE_TYPE=extras ARG BASE_IMAGE=ubuntu:22.04 +ARG GRPC_BASE_IMAGE=${BASE_IMAGE} # extras or core FROM ${BASE_IMAGE} as requirements-core @@ -104,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \ ################################### ################################### -FROM ${BASE_IMAGE} as grpc +FROM ${GRPC_BASE_IMAGE} as grpc ARG MAKEFLAGS ARG GRPC_VERSION=v1.58.0 From bbea62b907db917b8ad7036d06b828da48269bf8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 18 Apr 2024 22:43:12 +0200 Subject: [PATCH 0326/2895] feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 10 +- core/http/endpoints/openai/chat.go | 131 ++++++------------ core/http/endpoints/openai/completion.go | 4 +- core/http/endpoints/openai/request.go | 4 +- core/schema/openai.go | 14 +- pkg/{grammar => functions}/functions.go | 2 +- .../functions_suite_test.go} | 2 +- pkg/{grammar => functions}/functions_test.go | 4 +- .../grammar_json_schema.go} | 2 +- .../grammar_json_schema_test.go} | 4 +- pkg/functions/parse.go | 108 +++++++++++++++ pkg/functions/parse_test.go | 85 ++++++++++++ pkg/model/loader.go | 4 +- 13 files changed, 255 insertions(+), 119 deletions(-) rename pkg/{grammar => functions}/functions.go (98%) rename pkg/{grammar/grammar_suite_test.go => functions/functions_suite_test.go} (90%) rename pkg/{grammar => functions}/functions_test.go (96%) rename pkg/{grammar/json_schema.go => functions/grammar_json_schema.go} (99%) rename pkg/{grammar/json_schema_test.go => functions/grammar_json_schema_test.go} (98%) create mode 100644 pkg/functions/parse.go create mode 100644 pkg/functions/parse_test.go diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 81c92d01..1161cf9f 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -12,6 +12,7 @@ import ( "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" "gopkg.in/yaml.v3" @@ -39,7 +40,7 @@ type BackendConfig struct { InputToken [][]int `yaml:"-"` functionCallString, functionCallNameString string `yaml:"-"` - FunctionsConfig Functions `yaml:"function"` + FunctionsConfig functions.FunctionsConfig `yaml:"function"` FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. // LLM configs (GPT4ALL, Llama.cpp, ...) @@ -157,13 +158,6 @@ type AutoGPTQ struct { UseFastTokenizer bool `yaml:"use_fast_tokenizer"` } -type Functions struct { - DisableNoAction bool `yaml:"disable_no_action"` - NoActionFunctionName string `yaml:"no_action_function_name"` - NoActionDescriptionName string `yaml:"no_action_description_name"` - ParallelCalls bool `yaml:"parallel_calls"` -} - type TemplateConfig struct { Chat string `yaml:"chat"` ChatMessage string `yaml:"chat_message"` diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 36d1142b..9adba8ea 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -11,9 +11,8 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" "github.com/google/uuid" "github.com/rs/zerolog/log" @@ -68,8 +67,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup return true }) - results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls) - noActionToRun := len(results) > 0 && results[0].name == noAction + results := functions.ParseFunctionCall(result, config.FunctionsConfig) + noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0 switch { case noActionToRun: @@ -82,7 +81,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } responses <- initialMessage - result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) + result, err := handleQuestion(config, req, ml, startupOptions, results, prompt) if err != nil { log.Error().Err(err).Msg("error handling question") return @@ -105,7 +104,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup default: for i, ss := range results { - name, args := ss.name, ss.arguments + name, args := ss.Name, ss.Arguments initialMessage := schema.OpenAIResponse{ ID: id, @@ -156,8 +155,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } return func(c *fiber.Ctx) error { - processFunctions := false - funcs := grammar.Functions{} modelFile, input, err := readRequest(c, ml, startupOptions, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) @@ -169,6 +166,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } log.Debug().Msgf("Configuration read: %+v", config) + funcs := input.Functions + shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions() + // Allow the user to set custom actions via config file // to be "embedded" in each model noActionName := "answer" @@ -182,18 +182,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF + input.Grammar = functions.JSONBNF } config.Grammar = input.Grammar - // process functions if we have any defined or if we have a function call string - if len(input.Functions) > 0 && config.ShouldUseFunctions() { + if shouldUseFn { log.Debug().Msgf("Response needs to process functions") + } - processFunctions = true - - noActionGrammar := grammar.Function{ + switch { + case !config.FunctionsConfig.NoGrammar && shouldUseFn: + noActionGrammar := functions.Function{ Name: noActionName, Description: noActionDescription, Parameters: map[string]interface{}{ @@ -206,7 +206,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } // Append the no action function - funcs = append(funcs, input.Functions...) if !config.FunctionsConfig.DisableNoAction { funcs = append(funcs, noActionGrammar) } @@ -219,10 +218,17 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // Update input grammar jsStruct := funcs.ToJSONStructure() config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls) - } else if input.JSONFunctionGrammarObject != nil { + case input.JSONFunctionGrammarObject != nil: config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls) + default: + // Force picking one of the functions by the request + if config.FunctionToCall() != "" { + funcs = funcs.Select(config.FunctionToCall()) + } } + // process functions if we have any defined or if we have a function call string + // functions are not supported in stream mode (yet?) toStream := input.Stream @@ -232,8 +238,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // If we are using the tokenizer template, we don't need to process the messages // unless we are processing functions - if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { - + if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn { suppressConfigSystemPrompt := false mess := []string{} for messageIndex, i := range input.Messages { @@ -346,11 +351,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup templateFile = config.Model } - if config.TemplateConfig.Chat != "" && !processFunctions { + if config.TemplateConfig.Chat != "" && !shouldUseFn { templateFile = config.TemplateConfig.Chat } - if config.TemplateConfig.Functions != "" && processFunctions { + if config.TemplateConfig.Functions != "" && shouldUseFn { templateFile = config.TemplateConfig.Functions } @@ -370,7 +375,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { + if shouldUseFn && config.Grammar != "" { log.Debug().Msgf("Grammar: %+v", config.Grammar) } } @@ -388,7 +393,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup responses := make(chan schema.OpenAIResponse) - if !processFunctions { + if !shouldUseFn { go process(predInput, input, config, ml, responses) } else { go processTools(noActionName, predInput, input, config, ml, responses) @@ -446,18 +451,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // no streaming mode default: result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) { - if !processFunctions { + if !shouldUseFn { // no function is called, just reply and use stop as finish reason *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) return } - results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls) - noActionsToRun := len(results) > 0 && results[0].name == noActionName + results := functions.ParseFunctionCall(s, config.FunctionsConfig) + noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0 switch { case noActionsToRun: - result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) + result, err := handleQuestion(config, input, ml, startupOptions, results, predInput) if err != nil { log.Error().Err(err).Msg("error handling question") return @@ -476,7 +481,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } for _, ss := range results { - name, args := ss.name, ss.arguments + name, args := ss.Name, ss.Arguments if len(input.Tools) > 0 { // If we are using tools, we condense the function calls into // a single response choice with all the tools @@ -534,16 +539,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // Return the prediction in the response body return c.JSON(resp) } - } } -func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) { +func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, prompt string) (string, error) { log.Debug().Msgf("nothing to do, computing a reply") - + arg := "" + if len(funcResults) > 0 { + arg = funcResults[0].Arguments + } // If there is a message that the LLM already sends as part of the JSON reply, use it arguments := map[string]interface{}{} - json.Unmarshal([]byte(args), &arguments) + if err := json.Unmarshal([]byte(arg), &arguments); err != nil { + log.Debug().Msg("handleQuestion: function result did not contain a valid JSON object") + } m, exists := arguments["message"] if exists { switch message := m.(type) { @@ -580,63 +589,3 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m } return backend.Finetune(*config, prompt, prediction.Response), nil } - -type funcCallResults struct { - name string - arguments string -} - -func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { - results := []funcCallResults{} - - // TODO: use generics to avoid this code duplication - if multipleResults { - ss := []map[string]interface{}{} - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - for _, s := range ss { - func_name, ok := s["function"] - if !ok { - continue - } - args, ok := s["arguments"] - if !ok { - continue - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - continue - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - } else { - // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name, ok := ss["function"] - if !ok { - return results - } - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - if !ok { - return results - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - return results - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - - return results -} diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 69923475..bcd46db5 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -12,7 +12,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/google/uuid" @@ -70,7 +70,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a } if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF + input.Grammar = functions.JSONBNF } config.Grammar = input.Grammar diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index 369fb0b8..9a107bab 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -12,7 +12,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -145,7 +145,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque } if input.ToolsChoice != nil { - var toolChoice grammar.Tool + var toolChoice functions.Tool switch content := input.ToolsChoice.(type) { case string: diff --git a/core/schema/openai.go b/core/schema/openai.go index 6aa0f1b0..a251ba68 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -3,7 +3,7 @@ package schema import ( "context" - "github.com/go-skynet/LocalAI/pkg/grammar" + functions "github.com/go-skynet/LocalAI/pkg/functions" ) // APIError provides error information returned by the OpenAI API. @@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct { type OpenAIRequest struct { PredictionOptions - Context context.Context `json:"-"` + Context context.Context `json:"-"` Cancel context.CancelFunc `json:"-"` // whisper @@ -130,11 +130,11 @@ type OpenAIRequest struct { Messages []Message `json:"messages" yaml:"messages"` // A list of available functions to call - Functions []grammar.Function `json:"functions" yaml:"functions"` - FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object + Functions functions.Functions `json:"functions" yaml:"functions"` + FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object - Tools []grammar.Tool `json:"tools,omitempty" yaml:"tools"` - ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"` + Tools []functions.Tool `json:"tools,omitempty" yaml:"tools"` + ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"` Stream bool `json:"stream"` @@ -145,7 +145,7 @@ type OpenAIRequest struct { // A grammar to constrain the LLM output Grammar string `json:"grammar" yaml:"grammar"` - JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"` + JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"` Backend string `json:"backend" yaml:"backend"` diff --git a/pkg/grammar/functions.go b/pkg/functions/functions.go similarity index 98% rename from pkg/grammar/functions.go rename to pkg/functions/functions.go index 1038f5e6..d75a2ee3 100644 --- a/pkg/grammar/functions.go +++ b/pkg/functions/functions.go @@ -1,4 +1,4 @@ -package grammar +package functions import ( "encoding/json" diff --git a/pkg/grammar/grammar_suite_test.go b/pkg/functions/functions_suite_test.go similarity index 90% rename from pkg/grammar/grammar_suite_test.go rename to pkg/functions/functions_suite_test.go index 652643b6..8964b1c8 100644 --- a/pkg/grammar/grammar_suite_test.go +++ b/pkg/functions/functions_suite_test.go @@ -1,4 +1,4 @@ -package grammar +package functions import ( "testing" diff --git a/pkg/grammar/functions_test.go b/pkg/functions/functions_test.go similarity index 96% rename from pkg/grammar/functions_test.go rename to pkg/functions/functions_test.go index 6e8a56ed..97953a5e 100644 --- a/pkg/grammar/functions_test.go +++ b/pkg/functions/functions_test.go @@ -1,7 +1,7 @@ -package grammar_test +package functions_test import ( - . "github.com/go-skynet/LocalAI/pkg/grammar" + . "github.com/go-skynet/LocalAI/pkg/functions" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/pkg/grammar/json_schema.go b/pkg/functions/grammar_json_schema.go similarity index 99% rename from pkg/grammar/json_schema.go rename to pkg/functions/grammar_json_schema.go index 76f9778f..01046390 100644 --- a/pkg/grammar/json_schema.go +++ b/pkg/functions/grammar_json_schema.go @@ -1,4 +1,4 @@ -package grammar +package functions // a golang port of https://github.com/ggerganov/llama.cpp/pull/1887 diff --git a/pkg/grammar/json_schema_test.go b/pkg/functions/grammar_json_schema_test.go similarity index 98% rename from pkg/grammar/json_schema_test.go rename to pkg/functions/grammar_json_schema_test.go index 39d2a4d5..fc9029a8 100644 --- a/pkg/grammar/json_schema_test.go +++ b/pkg/functions/grammar_json_schema_test.go @@ -1,9 +1,9 @@ -package grammar_test +package functions_test import ( "strings" - . "github.com/go-skynet/LocalAI/pkg/grammar" + . "github.com/go-skynet/LocalAI/pkg/functions" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go new file mode 100644 index 00000000..5324e8c6 --- /dev/null +++ b/pkg/functions/parse.go @@ -0,0 +1,108 @@ +package functions + +import ( + "encoding/json" + "regexp" + + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" +) + +type FunctionsConfig struct { + DisableNoAction bool `yaml:"disable_no_action"` + NoActionFunctionName string `yaml:"no_action_function_name"` + NoActionDescriptionName string `yaml:"no_action_description_name"` + ParallelCalls bool `yaml:"parallel_calls"` + NoGrammar bool `yaml:"no_grammar"` + ResponseRegex string `yaml:"response_regex"` +} + +type FuncCallResults struct { + Name string + Arguments string +} + +func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults { + multipleResults := functionConfig.ParallelCalls + useGrammars := !functionConfig.NoGrammar + + results := []FuncCallResults{} + + // if no grammar is used, we have to extract function and arguments from the result + if !useGrammars { + // the response is a string that we have to parse + + // We use named regexes here to extract the function name and arguments + // obviously, this expects the LLM to be stable and return correctly formatted JSON + // TODO: optimize this and pre-compile it + var respRegex = regexp.MustCompile(functionConfig.ResponseRegex) + match := respRegex.FindStringSubmatch(llmresult) + result := make(map[string]string) + for i, name := range respRegex.SubexpNames() { + if i != 0 && name != "" && len(match) > i { + result[name] = match[i] + } + } + + // TODO: open point about multiple results and/or mixed with chat messages + // This is not handled as for now, we only expect one function call per response + functionName := result["function"] + if functionName == "" { + return results + } + + return append(results, FuncCallResults{Name: result["function"], Arguments: result["arguments"]}) + } + + // with grammars + // TODO: use generics to avoid this code duplication + if multipleResults { + ss := []map[string]interface{}{} + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + for _, s := range ss { + func_name, ok := s["function"] + if !ok { + continue + } + args, ok := s["arguments"] + if !ok { + continue + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + continue + } + results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)}) + } + } else { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name, ok := ss["function"] + if !ok { + return results + } + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + return results + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + return results + } + results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)}) + } + + return results +} diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go new file mode 100644 index 00000000..5168a7d1 --- /dev/null +++ b/pkg/functions/parse_test.go @@ -0,0 +1,85 @@ +package functions_test + +import ( + . "github.com/go-skynet/LocalAI/pkg/functions" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("LocalAI function parse tests", func() { + var functionConfig FunctionsConfig + + BeforeEach(func() { + // Default configuration setup + functionConfig = FunctionsConfig{ + ParallelCalls: false, + NoGrammar: false, + ResponseRegex: `(?P\w+)\s*\((?P.*)\)`, + } + }) + + Context("when using grammars and single result expected", func() { + It("should parse the function name and arguments correctly", func() { + input := `{"function": "add", "arguments": {"x": 5, "y": 3}}` + functionConfig.ParallelCalls = false + functionConfig.NoGrammar = false + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("add")) + Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`)) + }) + }) + + Context("when not using grammars and regex is needed", func() { + It("should extract function name and arguments from the regex", func() { + input := `add({"x":5,"y":3})` + functionConfig.NoGrammar = true + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("add")) + Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`)) + }) + }) + + Context("when having invalid input", func() { + It("returns no results when there is no input", func() { + input := "" + functionConfig.NoGrammar = true + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + + functionConfig.NoGrammar = false + + results = ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + }) + It("returns no results when is invalid", func() { + input := "invalid input" + functionConfig.NoGrammar = true + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + functionConfig.NoGrammar = false + + results = ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + }) + }) + Context("when parallel calls are enabled", func() { + It("should handle multiple function calls", func() { + input := `[{"function": "add", "arguments": {"x": 5, "y": 3}}, {"function": "subtract", "arguments": {"x": 10, "y": 7}}]` + functionConfig.ParallelCalls = true + functionConfig.NoGrammar = false + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(2)) + Expect(results[0].Name).To(Equal("add")) + Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`)) + Expect(results[1].Name).To(Equal("subtract")) + Expect(results[1].Arguments).To(Equal(`{"x":10,"y":7}`)) + }) + }) +}) diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 003d8327..f3182940 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -11,7 +11,7 @@ import ( "text/template" "github.com/Masterminds/sprig/v3" - grammar "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/grpc" process "github.com/mudler/go-processmanager" "github.com/rs/zerolog/log" @@ -25,7 +25,7 @@ type PromptTemplateData struct { SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_ Input string Instruction string - Functions []grammar.Function + Functions []functions.Function MessageIndex int } From e9448005a50bf966248ea34fbc0a63c23a43e4fb Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 18 Apr 2024 23:30:55 +0200 Subject: [PATCH 0327/2895] :arrow_up: Update ggerganov/llama.cpp (#2051) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d236f860..e2bfa594 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60 +CPPLLAMA_VERSION?=0d56246f4b9764158525d894b96606f6163c53a8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 852316c5a61fa8430299717912a2fd62f23fd572 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 18 Apr 2024 19:52:34 -0500 Subject: [PATCH 0328/2895] fix: move the GRPC cache generation workflow into it's own concurrency group (#2071) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index 11abc10a..c6b080b5 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -4,7 +4,7 @@ on: - workflow_dispatch concurrency: - group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }} + group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }} cancel-in-progress: true jobs: From 27ec84827c40a81663ef4df51c5e9e30bbb458c9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 19 Apr 2024 04:40:18 +0200 Subject: [PATCH 0329/2895] refactor(template): isolate and add tests (#2069) * refactor(template): isolate and add tests Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: Dave Co-authored-by: Dave --- pkg/model/loader.go | 111 +++++------------------------- pkg/model/loader_test.go | 7 +- pkg/templates/cache.go | 103 +++++++++++++++++++++++++++ pkg/templates/cache_test.go | 73 ++++++++++++++++++++ pkg/templates/utils_suite_test.go | 13 ++++ pkg/utils/path.go | 6 ++ 6 files changed, 218 insertions(+), 95 deletions(-) create mode 100644 pkg/templates/cache.go create mode 100644 pkg/templates/cache_test.go create mode 100644 pkg/templates/utils_suite_test.go diff --git a/pkg/model/loader.go b/pkg/model/loader.go index f3182940..1b5c9aa0 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -1,18 +1,19 @@ package model import ( - "bytes" "context" "fmt" "os" "path/filepath" "strings" "sync" - "text/template" - "github.com/Masterminds/sprig/v3" + "github.com/go-skynet/LocalAI/pkg/templates" + "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/go-skynet/LocalAI/pkg/utils" + process "github.com/mudler/go-processmanager" "github.com/rs/zerolog/log" ) @@ -42,21 +43,6 @@ type ChatMessageTemplateData struct { LastMessage bool } -// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go? -// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go -type TemplateType int - -const ( - ChatPromptTemplate TemplateType = iota - ChatMessageTemplate - CompletionPromptTemplate - EditPromptTemplate - FunctionsPromptTemplate - - // The following TemplateType is **NOT** a valid value and MUST be last. It exists to make the sanity integration tests simpler! - IntegrationTestTemplate -) - // new idea: what if we declare a struct of these here, and use a loop to check? // TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl @@ -67,7 +53,7 @@ type ModelLoader struct { grpcClients map[string]grpc.Backend models map[string]ModelAddress grpcProcesses map[string]*process.Process - templates map[TemplateType]map[string]*template.Template + templates *templates.TemplateCache wd *WatchDog } @@ -86,11 +72,10 @@ func NewModelLoader(modelPath string) *ModelLoader { ModelPath: modelPath, grpcClients: make(map[string]grpc.Backend), models: make(map[string]ModelAddress), - templates: make(map[TemplateType]map[string]*template.Template), + templates: templates.NewTemplateCache(modelPath), grpcProcesses: make(map[string]*process.Process), } - nml.initializeTemplateMap() return nml } @@ -99,7 +84,7 @@ func (ml *ModelLoader) SetWatchDog(wd *WatchDog) { } func (ml *ModelLoader) ExistsInModelPath(s string) bool { - return existsInPath(ml.ModelPath, s) + return utils.ExistsInPath(ml.ModelPath, s) } func (ml *ModelLoader) ListModels() ([]string, error) { @@ -194,82 +179,22 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress { return "" } -func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) { +const ( + ChatPromptTemplate templates.TemplateType = iota + ChatMessageTemplate + CompletionPromptTemplate + EditPromptTemplate + FunctionsPromptTemplate +) + +func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) { // TODO: should this check be improved? if templateType == ChatMessageTemplate { return "", fmt.Errorf("invalid templateType: ChatMessage") } - return ml.evaluateTemplate(templateType, templateName, in) + return ml.templates.EvaluateTemplate(templateType, templateName, in) } func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) { - return ml.evaluateTemplate(ChatMessageTemplate, templateName, messageData) -} - -func existsInPath(path string, s string) bool { - _, err := os.Stat(filepath.Join(path, s)) - return err == nil -} - -func (ml *ModelLoader) initializeTemplateMap() { - // This also seems somewhat clunky as we reference the Test / End of valid data value slug, but it works? - for tt := TemplateType(0); tt < IntegrationTestTemplate; tt++ { - ml.templates[tt] = make(map[string]*template.Template) - } -} - -func (ml *ModelLoader) evaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - m, ok := ml.templates[templateType][templateName] - if !ok { - // return "", fmt.Errorf("template not loaded: %s", templateName) - loadErr := ml.loadTemplateIfExists(templateType, templateName) - if loadErr != nil { - return "", loadErr - } - m = ml.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked - } - if m == nil { - return "", fmt.Errorf("failed loading a template for %s", templateName) - } - - var buf bytes.Buffer - - if err := m.Execute(&buf, in); err != nil { - return "", err - } - return buf.String(), nil -} - -func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateName string) error { - // Check if the template was already loaded - if _, ok := ml.templates[templateType][templateName]; ok { - return nil - } - - // Check if the model path exists - // skip any error here - we run anyway if a template does not exist - modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName) - - dat := "" - if ml.ExistsInModelPath(modelTemplateFile) { - d, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile)) - if err != nil { - return err - } - dat = string(d) - } else { - dat = templateName - } - - // Parse the template - tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) - if err != nil { - return err - } - ml.templates[templateType][templateName] = tmpl - - return nil + return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData) } diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index 4c3c1a88..e4207b35 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -92,10 +92,13 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac var _ = Describe("Templates", func() { Context("chat message", func() { - modelLoader := NewModelLoader("") + var modelLoader *ModelLoader + BeforeEach(func() { + modelLoader = NewModelLoader("") + }) for key := range testMatch { foo := testMatch[key] - It("renders correctly "+key, func() { + It("renders correctly `"+key+"`", func() { templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) Expect(err).ToNot(HaveOccurred()) Expect(templated).To(Equal(foo["expected"]), templated) diff --git a/pkg/templates/cache.go b/pkg/templates/cache.go new file mode 100644 index 00000000..9ff55605 --- /dev/null +++ b/pkg/templates/cache.go @@ -0,0 +1,103 @@ +package templates + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "sync" + "text/template" + + "github.com/go-skynet/LocalAI/pkg/utils" + + "github.com/Masterminds/sprig/v3" +) + +// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go? +// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go +type TemplateType int + +type TemplateCache struct { + mu sync.Mutex + templatesPath string + templates map[TemplateType]map[string]*template.Template +} + +func NewTemplateCache(templatesPath string) *TemplateCache { + tc := &TemplateCache{ + templatesPath: templatesPath, + templates: make(map[TemplateType]map[string]*template.Template), + } + return tc +} + +func (tc *TemplateCache) initializeTemplateMapKey(tt TemplateType) { + if _, ok := tc.templates[tt]; !ok { + tc.templates[tt] = make(map[string]*template.Template) + } +} + +func (tc *TemplateCache) EvaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) { + tc.mu.Lock() + defer tc.mu.Unlock() + + tc.initializeTemplateMapKey(templateType) + m, ok := tc.templates[templateType][templateName] + if !ok { + // return "", fmt.Errorf("template not loaded: %s", templateName) + loadErr := tc.loadTemplateIfExists(templateType, templateName) + if loadErr != nil { + return "", loadErr + } + m = tc.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked + } + if m == nil { + return "", fmt.Errorf("failed loading a template for %s", templateName) + } + + var buf bytes.Buffer + + if err := m.Execute(&buf, in); err != nil { + return "", err + } + return buf.String(), nil +} + +func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error { + + // Check if the template was already loaded + if _, ok := tc.templates[templateType][templateName]; ok { + return nil + } + + // Check if the model path exists + // skip any error here - we run anyway if a template does not exist + modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName) + + dat := "" + file := filepath.Join(tc.templatesPath, modelTemplateFile) + + // Security check + if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil { + return fmt.Errorf("template file outside path: %s", file) + } + + if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) { + d, err := os.ReadFile(file) + if err != nil { + return err + } + dat = string(d) + } else { + dat = templateName + } + + // Parse the template + tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) + if err != nil { + return err + } + tc.templates[templateType][templateName] = tmpl + + return nil +} diff --git a/pkg/templates/cache_test.go b/pkg/templates/cache_test.go new file mode 100644 index 00000000..83af02b2 --- /dev/null +++ b/pkg/templates/cache_test.go @@ -0,0 +1,73 @@ +package templates_test + +import ( + "os" + "path/filepath" + + "github.com/go-skynet/LocalAI/pkg/templates" // Update with your module path + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("TemplateCache", func() { + var ( + templateCache *templates.TemplateCache + tempDir string + ) + + BeforeEach(func() { + var err error + tempDir, err = os.MkdirTemp("", "templates") + Expect(err).NotTo(HaveOccurred()) + + // Writing example template files + err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0644) + Expect(err).NotTo(HaveOccurred()) + err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0644) + Expect(err).NotTo(HaveOccurred()) + + templateCache = templates.NewTemplateCache(tempDir) + }) + + AfterEach(func() { + os.RemoveAll(tempDir) // Clean up + }) + + Describe("EvaluateTemplate", func() { + Context("when template is loaded successfully", func() { + It("should evaluate the template correctly", func() { + result, err := templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal("Hello, Gopher!")) + }) + }) + + Context("when template isn't a file", func() { + It("should parse from string", func() { + result, err := templateCache.EvaluateTemplate(1, "{{.Name}}", map[string]string{"Name": "Gopher"}) + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal("Gopher")) + }) + }) + + Context("when template is empty", func() { + It("should return an empty string", func() { + result, err := templateCache.EvaluateTemplate(1, "empty", nil) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal("")) + }) + }) + }) + + Describe("concurrency", func() { + It("should handle multiple concurrent accesses", func(done Done) { + go func() { + _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) + }() + go func() { + _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) + }() + close(done) + }, 0.1) // timeout in seconds + }) +}) diff --git a/pkg/templates/utils_suite_test.go b/pkg/templates/utils_suite_test.go new file mode 100644 index 00000000..011ba8f6 --- /dev/null +++ b/pkg/templates/utils_suite_test.go @@ -0,0 +1,13 @@ +package templates_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestTemplates(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Templates test suite") +} diff --git a/pkg/utils/path.go b/pkg/utils/path.go index f95b0138..9982bc1e 100644 --- a/pkg/utils/path.go +++ b/pkg/utils/path.go @@ -2,10 +2,16 @@ package utils import ( "fmt" + "os" "path/filepath" "strings" ) +func ExistsInPath(path string, s string) bool { + _, err := os.Stat(filepath.Join(path, s)) + return err == nil +} + func inTrustedRoot(path string, trustedRoot string) error { for path != "/" { path = filepath.Dir(path) From b2772509b44f2a19bb5d61a19c261b2ea02dc180 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 19 Apr 2024 18:23:44 +0200 Subject: [PATCH 0330/2895] models(llama3): add llama3 to embedded models (#2074) Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 22 +++-- aio/gpu-8g/text-to-text.yaml | 22 +++-- aio/intel/text-to-text.yaml | 22 +++-- embedded/models/hermes-2-pro-mistral.yaml | 22 +++-- embedded/models/llama3-instruct.yaml | 48 +++++++++++ pkg/model/loader_test.go | 99 ++++++++++++++++++++++- 6 files changed, 203 insertions(+), 32 deletions(-) create mode 100644 embedded/models/llama3-instruct.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 6c4ec9e6..cf18f659 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 8d5c84f7..0407bb22 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index a7cb5b4d..f5f93c14 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -7,14 +7,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index 7bfa9418..dd18ce6f 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml new file mode 100644 index 00000000..d483d2b2 --- /dev/null +++ b/embedded/models/llama3-instruct.yaml @@ -0,0 +1,48 @@ +name: llama3-8b-instruct +mmap: true +parameters: + model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + +template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} +context_size: 8192 +f16: true +stopwords: +- <|im_end|> +- +- "<|eot_id|>" +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama3-8b-instruct", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index e4207b35..d3956b63 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -27,7 +27,84 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq {{- end }} <|im_end|>` -var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ +const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + +{{ if .FunctionCall -}} +Function call: +{{ else if eq .RoleName "tool" -}} +Function response: +{{ end -}} +{{ if .Content -}} +{{.Content -}} +{{ else if .FunctionCall -}} +{{ toJson .FunctionCall -}} +{{ end -}} +<|eot_id|>` + +var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "template": llama3, + "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "user", + RoleName: "user", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "assistant": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_call": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "", + FunctionCall: map[string]string{"function": "test"}, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_response": { + "template": llama3, + "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "tool", + RoleName: "tool", + Content: "Response from tool", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, +} + +var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ "user": { "template": chatML, "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", @@ -91,13 +168,27 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac } var _ = Describe("Templates", func() { - Context("chat message", func() { + Context("chat message ChatML", func() { var modelLoader *ModelLoader BeforeEach(func() { modelLoader = NewModelLoader("") }) - for key := range testMatch { - foo := testMatch[key] + for key := range chatMLTestMatch { + foo := chatMLTestMatch[key] + It("renders correctly `"+key+"`", func() { + templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) + Expect(err).ToNot(HaveOccurred()) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) + Context("chat message llama3", func() { + var modelLoader *ModelLoader + BeforeEach(func() { + modelLoader = NewModelLoader("") + }) + for key := range llama3TestMatch { + foo := llama3TestMatch[key] It("renders correctly `"+key+"`", func() { templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) Expect(err).ToNot(HaveOccurred()) From 1e3710193065cf79640cbe10bb0c1440313da6b7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 20 Apr 2024 02:05:16 +0200 Subject: [PATCH 0331/2895] :arrow_up: Update ggerganov/llama.cpp (#2080) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e2bfa594..b9af4612 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=0d56246f4b9764158525d894b96606f6163c53a8 +CPPLLAMA_VERSION?=0e4802b2ecbaab04b4f829fde4a3096ca19c84b5 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b9e770864356e82b8720be246e169ba2abedbf08 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Fri, 19 Apr 2024 19:31:15 -0500 Subject: [PATCH 0332/2895] feat: enable polling configs for systems with broken fsnotify (docker volumes on windows) (#2081) * feat: enable polling configs for systems with broken fsnotify (docker volumes on windows) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: update logging to make it clear that the config file is being polled Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- core/cli/run.go | 16 +++++++++------- core/config/application_config.go | 7 +++++++ core/startup/config_file_watcher.go | 15 +++++++++++++++ docs/content/docs/advanced/advanced-usage.md | 1 + 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index d729f946..02d863cd 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -14,13 +14,14 @@ import ( type RunCMD struct { ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` - ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` - BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` - ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"` - AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"` - UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` - ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` - LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"` + AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"` + UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` + ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` + LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` + LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"` // The alias on this option is there to preserve functionality with the old `--config-file` parameter ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"` @@ -65,6 +66,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithUploadDir(r.UploadPath), config.WithConfigsDir(r.ConfigPath), config.WithDynamicConfigDir(r.LocalaiConfigDir), + config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval), config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), config.WithModelLibraryURL(r.RemoteLibrary), diff --git a/core/config/application_config.go b/core/config/application_config.go index 77817616..d4adee18 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -23,6 +23,7 @@ type ApplicationConfig struct { UploadDir string ConfigsDir string DynamicConfigsDir string + DynamicConfigsDirPollInterval time.Duration CORS bool PreloadJSONModels string PreloadModelsFromPath string @@ -271,6 +272,12 @@ func WithDynamicConfigDir(dynamicConfigsDir string) AppOption { } } +func WithDynamicConfigDirPollInterval(interval time.Duration) AppOption { + return func(o *ApplicationConfig) { + o.DynamicConfigsDirPollInterval = interval + } +} + func WithApiKeys(apiKeys []string) AppOption { return func(o *ApplicationConfig) { o.ApiKeys = apiKeys diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 5d213df5..5f6834d4 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path" + "time" "github.com/fsnotify/fsnotify" "github.com/go-skynet/LocalAI/core/config" @@ -66,6 +67,20 @@ func (c *configFileHandler) Watch() error { log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory") } + if c.appConfig.DynamicConfigsDirPollInterval > 0 { + log.Debug().Msg("Poll interval set, falling back to polling for configuration changes") + ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval) + go func() { + for { + <-ticker.C + for file, handler := range c.handlers { + log.Debug().Str("file", file).Msg("polling config file") + c.callHandler(file, handler) + } + } + }() + } + // Start listening for events. go func() { for { diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index 4bd16030..cbf7dba3 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -402,6 +402,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | | --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | +| --localai-config-dir-poll-interval | | Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to a time duration to poll the LocalAI Config Dir (example: 1m) | $LOCALAI_CONFIG_DIR_POLL_INTERVAL | | --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE | #### Models Flags From 1038f7469c72e44e19cabaa0af474cb75d2b6121 Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 20 Apr 2024 04:42:02 -0400 Subject: [PATCH 0333/2895] fix: action-tmate: use connect-timeout-sections and limit-access-to-actor (#2083) fix for action-tmate: connect-timeout-sections and limit-access-to-actor Signed-off-by: Dave Lee --- .github/workflows/test.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46c4e065..9eb4f084 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,8 +121,10 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: mxschmitt/action-tmate@v3.18 + with: + connect-timeout-seconds: 180 + limit-access-to-actor: true tests-aio-container: runs-on: ubuntu-latest @@ -173,8 +175,10 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: mxschmitt/action-tmate@v3.18 + with: + connect-timeout-seconds: 180 + limit-access-to-actor: true tests-apple: runs-on: macOS-14 @@ -207,5 +211,7 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 \ No newline at end of file + uses: mxschmitt/action-tmate@v3.18 + with: + connect-timeout-seconds: 180 + limit-access-to-actor: true \ No newline at end of file From 8d30b39811fa1a00e9b8443a0b9f1db6e5609b5a Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 20 Apr 2024 03:43:37 -0500 Subject: [PATCH 0334/2895] feat: fiber logs with zerlog and add trace level (#2082) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- core/cli/cli.go | 2 +- core/cli/run.go | 5 ++--- core/config/application_config.go | 19 ++++++------------ core/http/api.go | 33 +++++++++++++++++++++---------- core/http/api_test.go | 1 - core/startup/startup.go | 6 ------ go.mod | 3 ++- go.sum | 4 ++++ main.go | 11 +++++++---- 9 files changed, 45 insertions(+), 39 deletions(-) diff --git a/core/cli/cli.go b/core/cli/cli.go index 5e757f64..2f2dcd8b 100644 --- a/core/cli/cli.go +++ b/core/cli/cli.go @@ -4,7 +4,7 @@ import "embed" type Context struct { Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"` - LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"` + LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"` // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI BackendAssets embed.FS `kong:"-"` diff --git a/core/cli/run.go b/core/cli/run.go index 02d863cd..16e65725 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -8,6 +8,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/http" "github.com/go-skynet/LocalAI/core/startup" + "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -60,7 +61,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithYAMLConfigPreload(r.PreloadModelsConfig), config.WithModelPath(r.ModelsPath), config.WithContextSize(r.ContextSize), - config.WithDebug(*ctx.LogLevel == "debug"), + config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel), config.WithImageDir(r.ImagePath), config.WithAudioDir(r.AudioPath), config.WithUploadDir(r.UploadPath), @@ -70,7 +71,6 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), config.WithModelLibraryURL(r.RemoteLibrary), - config.WithDisableMessage(false), config.WithCors(r.CORS), config.WithCorsAllowOrigins(r.CORSAllowOrigins), config.WithThreads(r.Threads), @@ -131,7 +131,6 @@ func (r *RunCMD) Run(ctx *Context) error { } cl, ml, options, err := startup.Startup(opts...) - if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } diff --git a/core/config/application_config.go b/core/config/application_config.go index d4adee18..2d733c1e 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -17,7 +17,7 @@ type ApplicationConfig struct { UploadLimitMB, Threads, ContextSize int DisableWelcomePage bool F16 bool - Debug, DisableMessage bool + Debug bool ImageDir string AudioDir string UploadDir string @@ -57,12 +57,11 @@ type AppOption func(*ApplicationConfig) func NewApplicationConfig(o ...AppOption) *ApplicationConfig { opt := &ApplicationConfig{ - Context: context.Background(), - UploadLimitMB: 15, - Threads: 1, - ContextSize: 512, - Debug: true, - DisableMessage: true, + Context: context.Background(), + UploadLimitMB: 15, + Threads: 1, + ContextSize: 512, + Debug: true, } for _, oo := range o { oo(opt) @@ -236,12 +235,6 @@ func WithDebug(debug bool) AppOption { } } -func WithDisableMessage(disableMessage bool) AppOption { - return func(o *ApplicationConfig) { - o.DisableMessage = disableMessage - } -} - func WithAudioDir(audioDir string) AppOption { return func(o *ApplicationConfig) { o.AudioDir = audioDir diff --git a/core/http/api.go b/core/http/api.go index af38512a..fe8f711c 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -7,7 +7,6 @@ import ( "strings" "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/gofiber/swagger" // swagger handler "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" @@ -19,10 +18,13 @@ import ( "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" - "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/recover" + "github.com/gofiber/swagger" // swagger handler + + "github.com/rs/zerolog/log" ) func readAuthHeader(c *fiber.Ctx) string { @@ -59,9 +61,11 @@ func readAuthHeader(c *fiber.Ctx) string { func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ - Views: renderEngine(), - BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB - DisableStartupMessage: appConfig.DisableMessage, + Views: renderEngine(), + BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + // We disable the Fiber startup message as it does not conform to structured logging. + // We register a startup log line with connection information in the OnListen hook to keep things user friendly though + DisableStartupMessage: true, // Override default error handler ErrorHandler: func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -82,11 +86,20 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi }, }) - if appConfig.Debug { - app.Use(logger.New(logger.Config{ - Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", - })) - } + app.Hooks().OnListen(func(listenData fiber.ListenData) error { + scheme := "http" + if listenData.TLS { + scheme = "https" + } + log.Info().Str("endpoint", scheme+"://"+listenData.Host+":"+listenData.Port).Msg("LocalAI API is listening! Please connect to the endpoint for API documentation.") + return nil + }) + + // Have Fiber use zerolog like the rest of the application rather than it's built-in logger + logger := log.Logger + app.Use(fiberzerolog.New(fiberzerolog.Config{ + Logger: &logger, + })) // Default middleware config diff --git a/core/http/api_test.go b/core/http/api_test.go index 1553ed21..35e0a8bf 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -211,7 +211,6 @@ var _ = Describe("API test", func() { commonOpts := []config.AppOption{ config.WithDebug(true), - config.WithDisableMessage(true), } Context("API with ephemeral models", func() { diff --git a/core/startup/startup.go b/core/startup/startup.go index af92f0e1..97882a22 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -10,18 +10,12 @@ import ( "github.com/go-skynet/LocalAI/pkg/assets" "github.com/go-skynet/LocalAI/pkg/model" pkgStartup "github.com/go-skynet/LocalAI/pkg/startup" - "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { options := config.NewApplicationConfig(opts...) - zerolog.SetGlobalLevel(zerolog.InfoLevel) - if options.Debug { - zerolog.SetGlobalLevel(zerolog.DebugLevel) - } - log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) diff --git a/go.mod b/go.mod index 99af8ce7..0bf9aa02 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/otiai10/openaigo v1.6.0 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.17.0 - github.com/rs/zerolog v1.31.0 + github.com/rs/zerolog v1.32.0 github.com/russross/blackfriday v1.6.0 github.com/sashabaranov/go-openai v1.20.4 github.com/schollz/progressbar/v3 v3.13.1 @@ -145,6 +145,7 @@ require ( github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/gofiber/contrib/fiberzerolog v1.0.0 github.com/google/go-cmp v0.6.0 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect diff --git a/go.sum b/go.sum index a421e79c..55fdaf06 100644 --- a/go.sum +++ b/go.sum @@ -100,6 +100,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gofiber/contrib/fiberzerolog v1.0.0 h1:IB8q+NO2zPNS4VHKde1x5DqtMJ5vGrvDCydnAjlFw3E= +github.com/gofiber/contrib/fiberzerolog v1.0.0/go.mod h1:SOi+Wo7RQlO/HV0jsYTu6uFQy+8ZPTzCZW4fDEKD3l8= github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM= github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= @@ -281,6 +283,8 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= +github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0= +github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= diff --git a/main.go b/main.go index 8b5696d1..0b40175e 100644 --- a/main.go +++ b/main.go @@ -91,17 +91,20 @@ Version: ${version} switch *cli.CLI.LogLevel { case "error": - log.Info().Msg("Setting logging to error") zerolog.SetGlobalLevel(zerolog.ErrorLevel) + log.Info().Msg("Setting logging to error") case "warn": - log.Info().Msg("Setting logging to warn") zerolog.SetGlobalLevel(zerolog.WarnLevel) + log.Info().Msg("Setting logging to warn") case "info": - log.Info().Msg("Setting logging to info") zerolog.SetGlobalLevel(zerolog.InfoLevel) + log.Info().Msg("Setting logging to info") case "debug": - log.Info().Msg("Setting logging to debug") zerolog.SetGlobalLevel(zerolog.DebugLevel) + log.Debug().Msg("Setting logging to debug") + case "trace": + zerolog.SetGlobalLevel(zerolog.TraceLevel) + log.Trace().Msg("Setting logging to trace") } // Populate the application with the embedded backend assets From b319ed58b026f91f48599c62c85eec5fbbc8764b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 20 Apr 2024 15:22:54 +0200 Subject: [PATCH 0335/2895] models(gallery): add gallery (#2078) Signed-off-by: Ettore Di Giacinto --- core/cli/run.go | 2 +- gallery/bert-embeddings.yaml | 15 ++ gallery/index.yaml | 503 +++++++++++++++++++++++++++++++++++ gallery/stablediffusion.yaml | 54 ++++ gallery/tinydream.yaml | 42 +++ gallery/virtual.yaml | 6 + gallery/whisper-base.yaml | 18 ++ main.go | 1 + 8 files changed, 640 insertions(+), 1 deletion(-) create mode 100644 gallery/bert-embeddings.yaml create mode 100644 gallery/index.yaml create mode 100644 gallery/stablediffusion.yaml create mode 100644 gallery/tinydream.yaml create mode 100644 gallery/virtual.yaml create mode 100644 gallery/whisper-base.yaml diff --git a/core/cli/run.go b/core/cli/run.go index 16e65725..42185a28 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -26,7 +26,7 @@ type RunCMD struct { // The alias on this option is there to preserve functionality with the old `--config-file` parameter ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"` - Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"` + Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"` AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"` RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"` PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"` diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml new file mode 100644 index 00000000..0798bf54 --- /dev/null +++ b/gallery/bert-embeddings.yaml @@ -0,0 +1,15 @@ +name: "bert-embeddings" +license: "Apache 2.0" +urls: +- https://huggingface.co/skeskinen/ggml +description: | + Bert model that can be used for embeddings +config_file: | + parameters: + model: bert-MiniLM-L6-v2q4_0.bin + backend: bert-embeddings + embeddings: true +files: +- filename: "bert-MiniLM-L6-v2q4_0.bin" + sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad" + uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin" \ No newline at end of file diff --git a/gallery/index.yaml b/gallery/index.yaml new file mode 100644 index 00000000..6b882768 --- /dev/null +++ b/gallery/index.yaml @@ -0,0 +1,503 @@ +## Whisper +- url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" + name: "whisper-1" + license: other +## Bert embeddings +- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" + name: "bert-embeddings" + license: other +- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" + name: "text-embedding-ada-002" + license: other +## Stable Diffusion +- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master + name: stablediffusion + license: other +## Tiny Dream +- url: github:mudler/LocalAI/gallery/tinydream.yaml@master + name: tinydream + license: other +## Piper TTS +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-kathleen-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-kathleen-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ca-upc_ona-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ca-upc_ona-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ca-upc_pau-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ca-upc_pau-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-da-nst_talesyntese-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-da-nst_talesyntese-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-eva_k-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-eva_k-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-karlsson-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-karlsson-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-kerstin-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-kerstin-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-pavoque-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-pavoque-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-ramona-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-ramona-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-thorsten-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-thorsten-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-el-gr-rapunzelina-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-el-gr-rapunzelina-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-gb-alan-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-gb-alan-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-gb-southern_english_female-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-gb-southern_english_female-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-amy-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-danny-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-danny-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-kathleen-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-kathleen-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-lessac-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-lessac-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-lessac-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-lessac-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-libritts-high + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-libritts-high.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-ryan-high + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-ryan-high.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-ryan-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-ryan-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-ryan-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-ryan-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us_lessac + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us_lessac.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-es-carlfm-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-es-carlfm-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-es-mls_10246-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-es-mls_10246-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-es-mls_9972-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-es-mls_9972-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fi-harri-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fi-harri-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-gilles-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-gilles-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-mls_1840-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-mls_1840-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-siwis-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-siwis-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-siwis-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-siwis-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-bui-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-bui-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-salka-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-salka-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-steinn-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-steinn-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-ugla-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-ugla-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-it-riccardo_fasol-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-it-riccardo_fasol-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-kk-iseke-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-kk-iseke-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-kk-issai-high + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-kk-issai-high.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-kk-raya-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-kk-raya-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ne-google-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ne-google-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ne-google-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ne-google-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-mls_5809-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-mls_5809-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-mls_7432-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-mls_7432-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-nathalie-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-nathalie-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-rdh-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-rdh-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-rdh-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-rdh-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-no-talesyntese-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-no-talesyntese-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-pl-mls_6892-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-pl-mls_6892-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-pt-br-edresson-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-pt-br-edresson-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ru-irinia-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ru-irinia-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-sv-se-nst-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-sv-se-nst-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-uk-lada-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-uk-lada-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-vi-25hours-single-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-vi-25hours-single-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-vi-vivos-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-vi-vivos-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-zh-cn-huayan-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-zh-cn-huayan-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-zh_CN-huayan-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-zh_CN-huayan-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz \ No newline at end of file diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml new file mode 100644 index 00000000..c8a0eb8b --- /dev/null +++ b/gallery/stablediffusion.yaml @@ -0,0 +1,54 @@ +name: "stablediffusion-cpp" +license: "BSD-3" +urls: +- https://github.com/EdVince/Stable-Diffusion-NCNN +- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + +description: | + Stable Diffusion in NCNN with c++, supported txt2img and img2img +config_file: | + name: stablediffusion-cpp + backend: stablediffusion + parameters: + model: stablediffusion_assets + +files: +- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" +- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" +- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" +- filename: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" +- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" +- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" +- filename: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" \ No newline at end of file diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml new file mode 100644 index 00000000..415762de --- /dev/null +++ b/gallery/tinydream.yaml @@ -0,0 +1,42 @@ +name: "tinydream" +license: "BSD-3" +urls: + - https://github.com/symisc/tiny-dream + - https://github.com/symisc/tiny-dream/blob/main/LICENSE + +description: | + An embedded, Header Only, Stable Diffusion C++ implementation +config_file: | + name: tinydream + backend: tinydream + parameters: + model: tinydream_assets + +files: + - filename: "tinydream_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin" + - filename: "tinydream_assets/AutoencoderKL-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param" + - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin" + - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param" + - filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin" + sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin" + - filename: "tinydream_assets/RealESRGAN_x4plus_anime.param" + sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param" + - filename: "tinydream_assets/UNetModel-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin" + - filename: "tinydream_assets/UNetModel-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param" + - filename: "tinydream_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" \ No newline at end of file diff --git a/gallery/virtual.yaml b/gallery/virtual.yaml new file mode 100644 index 00000000..054c3257 --- /dev/null +++ b/gallery/virtual.yaml @@ -0,0 +1,6 @@ +name: "virtual" + +description: | + A Base model definition + +license: "N/A" \ No newline at end of file diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml new file mode 100644 index 00000000..574dbb13 --- /dev/null +++ b/gallery/whisper-base.yaml @@ -0,0 +1,18 @@ +name: "whisper-base" +license: "MIT" +urls: +- https://github.com/ggerganov/whisper.cpp +- https://huggingface.co/ggerganov/whisper.cpp + +description: | + Port of OpenAI's Whisper model in C/C++ + +config_file: | + backend: whisper + parameters: + model: ggml-whisper-base.bin + +files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/main.go b/main.go index 0b40175e..9976906b 100644 --- a/main.go +++ b/main.go @@ -72,6 +72,7 @@ Version: ${version} kong.Vars{ "basepath": kong.ExpandPath("."), "remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml", + "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml"}]`, "version": internal.PrintableVersion(), }, ) From 03adc1f60d97ae7cd5d3b1e58c5511e36c5f4eba Mon Sep 17 00:00:00 2001 From: Taikono-Himazin Date: Sat, 20 Apr 2024 23:37:02 +0900 Subject: [PATCH 0336/2895] Add tensor_parallel_size setting to vllm setting items (#2085) Signed-off-by: Taikono-Himazin --- backend/backend.proto | 1 + backend/python/vllm/backend_vllm.py | 2 ++ core/backend/options.go | 1 + core/config/backend_config.go | 1 + 4 files changed, 5 insertions(+) diff --git a/backend/backend.proto b/backend/backend.proto index 62e1a1a6..ec01e4a7 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -177,6 +177,7 @@ message ModelOptions { bool EnforceEager = 52; int32 SwapSpace = 53; int32 MaxModelLen = 54; + int32 TensorParallelSize = 55; string MMProj = 41; diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py index ff0f0b26..2d8b55db 100644 --- a/backend/python/vllm/backend_vllm.py +++ b/backend/python/vllm/backend_vllm.py @@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): engine_args.trust_remote_code = request.TrustRemoteCode if request.EnforceEager: engine_args.enforce_eager = request.EnforceEager + if request.TensorParallelSize: + engine_args.tensor_parallel_size = request.TensorParallelSize if request.SwapSpace != 0: engine_args.swap_space = request.SwapSpace if request.MaxModelLen != 0: diff --git a/core/backend/options.go b/core/backend/options.go index 5b303b05..60cb01ff 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { EnforceEager: c.EnforceEager, SwapSpace: int32(c.SwapSpace), MaxModelLen: int32(c.MaxModelLen), + TensorParallelSize: int32(c.TensorParallelSize), MMProj: c.MMProj, YarnExtFactor: c.YarnExtFactor, YarnAttnFactor: c.YarnAttnFactor, diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 1161cf9f..a439ee63 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -140,6 +140,7 @@ type LLMConfig struct { EnforceEager bool `yaml:"enforce_eager"` // vLLM SwapSpace int `yaml:"swap_space"` // vLLM MaxModelLen int `yaml:"max_model_len"` // vLLM + TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM MMProj string `yaml:"mmproj"` RopeScaling string `yaml:"rope_scaling"` From afa1bca1e367c2a52fd584d95d5a98904cadb353 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 20 Apr 2024 20:20:10 +0200 Subject: [PATCH 0337/2895] fix(llama.cpp): set -1 as default for max tokens (#2087) Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index a439ee63..6ca24afa 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -205,15 +205,15 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { defaultTopP := 0.95 defaultTopK := 40 defaultTemp := 0.9 - defaultMaxTokens := 2048 defaultMirostat := 2 defaultMirostatTAU := 5.0 defaultMirostatETA := 0.1 defaultTypicalP := 1.0 defaultTFZ := 1.0 + defaultInfinity := -1 // Try to offload all GPU layers (if GPU is found) - defaultNGPULayers := 99999999 + defaultHigh := 99999999 trueV := true falseV := false @@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { } if cfg.Maxtokens == nil { - cfg.Maxtokens = &defaultMaxTokens + cfg.Maxtokens = &defaultInfinity } if cfg.Mirostat == nil { @@ -269,7 +269,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.MirostatTAU = &defaultMirostatTAU } if cfg.NGPULayers == nil { - cfg.NGPULayers = &defaultNGPULayers + cfg.NGPULayers = &defaultHigh } if cfg.LowVRAM == nil { From 284ad026b1ce3d2751a51e48e5eea8ea6458e191 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 01:19:57 +0200 Subject: [PATCH 0338/2895] refactor(routes): split routes registration (#2077) Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- core/http/api.go | 135 ++----------------------- core/http/endpoints/localai/welcome.go | 28 +++++ core/http/render.go | 37 ------- core/http/routes/elevenlabs.go | 19 ++++ core/http/routes/localai.go | 64 ++++++++++++ core/http/routes/openai.go | 86 ++++++++++++++++ core/http/routes/welcome.go | 23 +++++ 8 files changed, 227 insertions(+), 167 deletions(-) create mode 100644 core/http/endpoints/localai/welcome.go create mode 100644 core/http/routes/elevenlabs.go create mode 100644 core/http/routes/localai.go create mode 100644 core/http/routes/openai.go create mode 100644 core/http/routes/welcome.go diff --git a/Makefile b/Makefile index b9af4612..ea81b535 100644 --- a/Makefile +++ b/Makefile @@ -714,4 +714,4 @@ docker-image-intel-xpu: .PHONY: swagger swagger: - swag init -g core/http/api.go --output swagger + swag init -g core/http/app.go --output swagger diff --git a/core/http/api.go b/core/http/api.go index fe8f711c..1061627f 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -8,22 +8,21 @@ import ( "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + "github.com/go-skynet/LocalAI/core/http/routes" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/services" - "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/recover" - "github.com/gofiber/swagger" // swagger handler + // swagger handler "github.com/rs/zerolog/log" ) @@ -175,16 +174,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Use(c) } - // LocalAI API endpoints - galleryService := services.NewGalleryService(appConfig.ModelPath) - galleryService.Start(appConfig.Context, cl) - - app.Get("/version", auth, func(c *fiber.Ctx) error { - return c.JSON(struct { - Version string `json:"version"` - }{Version: internal.PrintableVersion()}) - }) - // Make sure directories exists os.MkdirAll(appConfig.ImageDir, 0755) os.MkdirAll(appConfig.AudioDir, 0755) @@ -197,122 +186,10 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - app.Get("/swagger/*", swagger.HandlerDefault) // default - - welcomeRoute( - app, - cl, - ml, - appConfig, - auth, - ) - - modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) - app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) - app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) - app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) - app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) - app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint()) - app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) - app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) - - app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) - - // Elevenlabs - app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) - - // Stores - sl := model.NewModelLoader("") - app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) - app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) - app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) - app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) - - // openAI compatible API endpoint - - // chat - app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) - app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) - - // edit - app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) - app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) - - // assistant - app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - - // files - app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) - app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) - - // completion - app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - - // embeddings - app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - - // audio - app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) - app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) - - // images - app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) - - if appConfig.ImageDir != "" { - app.Static("/generated-images", appConfig.ImageDir) - } - - if appConfig.AudioDir != "" { - app.Static("/generated-audio", appConfig.AudioDir) - } - - ok := func(c *fiber.Ctx) error { - return c.SendStatus(200) - } - - // Kubernetes health checks - app.Get("/healthz", ok) - app.Get("/readyz", ok) - - // Experimental Backend Statistics Module - backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) - - // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) - - app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) + routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth) + routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, auth) + routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) + routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) // Define a custom 404 handler // Note: keep this at the bottom! diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go new file mode 100644 index 00000000..fd3e6230 --- /dev/null +++ b/core/http/endpoints/localai/welcome.go @@ -0,0 +1,28 @@ +package localai + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/internal" + "github.com/gofiber/fiber/v2" +) + +func WelcomeEndpoint(appConfig *config.ApplicationConfig, + models []string, backendConfigs []config.BackendConfig) func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + summary := fiber.Map{ + "Title": "LocalAI API - " + internal.PrintableVersion(), + "Version": internal.PrintableVersion(), + "Models": models, + "ModelsConfig": backendConfigs, + "ApplicationConfig": appConfig, + } + + if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { + // The client expects a JSON response + return c.Status(fiber.StatusOK).JSON(summary) + } else { + // Render index + return c.Render("views/index", summary) + } + } +} diff --git a/core/http/render.go b/core/http/render.go index c5045868..8f1b36c6 100644 --- a/core/http/render.go +++ b/core/http/render.go @@ -7,10 +7,7 @@ import ( "net/http" "github.com/Masterminds/sprig/v3" - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" fiberhtml "github.com/gofiber/template/html/v2" "github.com/russross/blackfriday" @@ -33,40 +30,6 @@ func notFoundHandler(c *fiber.Ctx) error { return nil } -func welcomeRoute( - app *fiber.App, - cl *config.BackendConfigLoader, - ml *model.ModelLoader, - appConfig *config.ApplicationConfig, - auth func(*fiber.Ctx) error, -) { - if appConfig.DisableWelcomePage { - return - } - - models, _ := ml.ListModels() - backendConfigs := cl.GetAllBackendConfigs() - - app.Get("/", auth, func(c *fiber.Ctx) error { - summary := fiber.Map{ - "Title": "LocalAI API - " + internal.PrintableVersion(), - "Version": internal.PrintableVersion(), - "Models": models, - "ModelsConfig": backendConfigs, - "ApplicationConfig": appConfig, - } - - if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { - // The client expects a JSON response - return c.Status(fiber.StatusOK).JSON(summary) - } else { - // Render index - return c.Render("views/index", summary) - } - }) - -} - func renderEngine() *fiberhtml.Engine { engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html") engine.AddFuncMap(sprig.FuncMap()) diff --git a/core/http/routes/elevenlabs.go b/core/http/routes/elevenlabs.go new file mode 100644 index 00000000..e24a19a8 --- /dev/null +++ b/core/http/routes/elevenlabs.go @@ -0,0 +1,19 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterElevenLabsRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + // Elevenlabs + app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) + +} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go new file mode 100644 index 00000000..2651a53e --- /dev/null +++ b/core/http/routes/localai.go @@ -0,0 +1,64 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/internal" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/gofiber/swagger" +) + +func RegisterLocalAIRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + app.Get("/swagger/*", swagger.HandlerDefault) // default + + // LocalAI API endpoints + galleryService := services.NewGalleryService(appConfig.ModelPath) + galleryService.Start(appConfig.Context, cl) + + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) + app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) + app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) + app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) + app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint()) + app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) + app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) + + app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) + + // Stores + sl := model.NewModelLoader("") + app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) + app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) + app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) + app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) + + // Kubernetes health checks + ok := func(c *fiber.Ctx) error { + return c.SendStatus(200) + } + + app.Get("/healthz", ok) + app.Get("/readyz", ok) + + app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) + + // Experimental Backend Statistics Module + backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) + + app.Get("/version", auth, func(c *fiber.Ctx) error { + return c.JSON(struct { + Version string `json:"version"` + }{Version: internal.PrintableVersion()}) + }) + +} diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go new file mode 100644 index 00000000..c51ccdcb --- /dev/null +++ b/core/http/routes/openai.go @@ -0,0 +1,86 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" + "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterOpenAIRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + // openAI compatible API endpoint + + // chat + app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + + // edit + app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + + // assistant + app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + + // files + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + + // completion + app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + + // embeddings + app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + + // audio + app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) + + // images + app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) + + if appConfig.ImageDir != "" { + app.Static("/generated-images", appConfig.ImageDir) + } + + if appConfig.AudioDir != "" { + app.Static("/generated-audio", appConfig.AudioDir) + } + + // models + app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) +} diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go new file mode 100644 index 00000000..29b9e586 --- /dev/null +++ b/core/http/routes/welcome.go @@ -0,0 +1,23 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterPagesRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + models, _ := ml.ListModels() + backendConfigs := cl.GetAllBackendConfigs() + + if !appConfig.DisableWelcomePage { + app.Get("/", auth, localai.WelcomeEndpoint(appConfig, models, backendConfigs)) + } + +} From 180cd4ccda0753ef1afb2eb07857ec0534ea3366 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 16:34:00 +0200 Subject: [PATCH 0339/2895] fix(llama.cpp-ggml): fixup `max_tokens` for old backend (#2094) fix(llama.cpp-ggml): set 0 as default for `max_tokens` Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 6ca24afa..dfc216dc 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { defaultMirostatETA := 0.1 defaultTypicalP := 1.0 defaultTFZ := 1.0 - defaultInfinity := -1 + defaultZero := 0 // Try to offload all GPU layers (if GPU is found) defaultHigh := 99999999 @@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { } if cfg.Maxtokens == nil { - cfg.Maxtokens = &defaultInfinity + cfg.Maxtokens = &defaultZero } if cfg.Mirostat == nil { From 39814cab32a19fa4a6b88935d4587c6c6bbebe16 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 16:46:13 +0200 Subject: [PATCH 0340/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4c2f68b2..e28e3cb0 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- llama3: https://github.com/mudler/LocalAI/discussions/2076 - Parler-TTS: https://github.com/mudler/LocalAI/pull/2027 - Landing page: https://github.com/mudler/LocalAI/pull/1922 - Openvino support: https://github.com/mudler/LocalAI/pull/1892 From 66b002458db4ec93133d066326a63585ba236412 Mon Sep 17 00:00:00 2001 From: fakezeta Date: Sun, 21 Apr 2024 18:20:25 +0200 Subject: [PATCH 0341/2895] Transformer Backend: Implementing use_tokenizer_template and stop_prompts options (#2090) * fix regression #1971 fixes regression #1971 introduced by intel_extension_for_transformers==1.4 * UseTokenizerTemplate and StopPrompt Implementation of use_tokenizer_template and stopwords options --- .../transformers/transformers_server.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index c7f1cd75..1b38a956 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -148,7 +148,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): else: device_map="CPU" self.model = OVModelForCausalLM.from_pretrained(model_name, - compile=True, + compile=True, + ov_config={"PERFORMANCE_HINT": "LATENCY"}, device=device_map) self.OV = True else: @@ -212,12 +213,25 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): set_seed(request.Seed) if request.TopP == 0: request.TopP = 0.9 + + if request.TopK == 0: + request.TopK = 40 max_tokens = 200 if request.Tokens > 0: max_tokens = request.Tokens - inputs = self.tokenizer(request.Prompt, return_tensors="pt") + prompt = request.Prompt + if not request.Prompt and request.UseTokenizerTemplate and request.Messages: + prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) + + eos_token_id = self.tokenizer.eos_token_id + if request.StopPrompts: + eos_token_id = [] + for word in request.StopPrompts: + eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word)) + + inputs = self.tokenizer(prompt, return_tensors="pt") if self.CUDA: inputs = inputs.to("cuda") if XPU and self.OV == False: @@ -235,7 +249,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): top_k=request.TopK, do_sample=True, attention_mask=inputs["attention_mask"], - eos_token_id=self.tokenizer.eos_token_id, + eos_token_id=eos_token_id, pad_token_id=self.tokenizer.eos_token_id, streamer=streamer) thread=Thread(target=self.model.generate, kwargs=config) @@ -264,7 +278,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): top_k=request.TopK, do_sample=True, attention_mask=inputs["attention_mask"], - eos_token_id=self.tokenizer.eos_token_id, + eos_token_id=eos_token_id, pad_token_id=self.tokenizer.eos_token_id) generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0] From 38c9abed8bef6cb9c9b7c29ee1b92f86e5317ec7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 21 Apr 2024 18:35:30 +0200 Subject: [PATCH 0342/2895] :arrow_up: Update ggerganov/llama.cpp (#2089) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ea81b535..761c76d6 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=0e4802b2ecbaab04b4f829fde4a3096ca19c84b5 +CPPLLAMA_VERSION?=b8109bc0139f15a5b321909f47510b89dca47ffc # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 228bc4903f7eed3d384e1094255a8159153158a2 Mon Sep 17 00:00:00 2001 From: Dave Date: Sun, 21 Apr 2024 16:39:17 -0400 Subject: [PATCH 0343/2895] fix: action-tmate detached (#2092) connect-timeout-seconds works best with `detached: true` Signed-off-by: Dave --- .github/workflows/test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9eb4f084..f50479e1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -123,6 +123,7 @@ jobs: if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.18 with: + detached: true connect-timeout-seconds: 180 limit-access-to-actor: true @@ -177,6 +178,7 @@ jobs: if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.18 with: + detached: true connect-timeout-seconds: 180 limit-access-to-actor: true @@ -213,5 +215,6 @@ jobs: if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.18 with: + detached: true connect-timeout-seconds: 180 - limit-access-to-actor: true \ No newline at end of file + limit-access-to-actor: true From f3f6535aad2c899afbc71b273ebd9282438b7814 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 22:39:28 +0200 Subject: [PATCH 0344/2895] fix: rename fiber entrypoint from http/api to http/app (#2096) Signed-off-by: Ettore Di Giacinto Co-authored-by: Dave --- core/http/{api.go => app.go} | 0 core/http/{api_test.go => app_test.go} | 0 core/http/{apt_suite_test.go => http_suite_test.go} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename core/http/{api.go => app.go} (100%) rename core/http/{api_test.go => app_test.go} (100%) rename core/http/{apt_suite_test.go => http_suite_test.go} (100%) diff --git a/core/http/api.go b/core/http/app.go similarity index 100% rename from core/http/api.go rename to core/http/app.go diff --git a/core/http/api_test.go b/core/http/app_test.go similarity index 100% rename from core/http/api_test.go rename to core/http/app_test.go diff --git a/core/http/apt_suite_test.go b/core/http/http_suite_test.go similarity index 100% rename from core/http/apt_suite_test.go rename to core/http/http_suite_test.go From 220958a87c17cf6f1c82dcb4f3f3f8756ea3881d Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Mon, 22 Apr 2024 13:34:59 +0900 Subject: [PATCH 0345/2895] fix: typo in models.go (#2099) --- core/cli/models.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/cli/models.go b/core/cli/models.go index 62ef366b..6615e21d 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -25,7 +25,7 @@ type ModelsInstall struct { } type ModelsCMD struct { - List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"` + List ModelsList `cmd:"" help:"List the models available in your galleries" default:"withargs"` Install ModelsInstall `cmd:"" help:"Install a model from the gallery"` } From 729378ca98b66ef84921c8f0eb40208e0c2721a5 Mon Sep 17 00:00:00 2001 From: jtwolfe Date: Mon, 22 Apr 2024 23:47:51 +1000 Subject: [PATCH 0346/2895] AMD/ROCm Documentation update + formatting fix (#2100) * Update aio-images.md Signed-off-by: jtwolfe * Update aio-images.md Signed-off-by: jtwolfe * Update aio-images.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe --------- Signed-off-by: jtwolfe --- .../content/docs/features/GPU-acceleration.md | 139 +++++++++++++++++- docs/content/docs/reference/aio-images.md | 15 +- 2 files changed, 146 insertions(+), 8 deletions(-) diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index aa931f07..b382309e 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -12,7 +12,7 @@ Section under construction This section contains instruction on how to use LocalAI with GPU acceleration. {{% alert icon="⚡" context="warning" %}} -For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) +For accelleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) {{% /alert %}} @@ -110,6 +110,143 @@ llama_model_load_internal: total VRAM used: 1598 MB llama_init_from_file: kv self size = 512.00 MB ``` +## ROCM(AMD) acceleration + +There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation. + +Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation. + +### Requirements + +- `ROCm 6.x.x` compatible GPU/accelerator +- OS: `Ubuntu` (22.04, 20.04), `RHEL` (9.3, 9.2, 8.9, 8.8), `SLES` (15.5, 15.4) +- Installed to host: `amdgpu-dkms` and `rocm` >=6.0.0 as per ROCm documentation. + +### Recommendations + +- Do not use on a system running Wayland. +- If running with Xorg do not use GPU assigned for compute for desktop rendering. +- Ensure at least 100GB of free space on disk hosting container runtime and storing images prior to installation. + +### Limitations + +Ongoing verification testing of ROCm compatability with integrated backends. +Please note the following list of verified backends and devices. + +### Verified + +The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0` + +| Backend | Verified | Devices | +| ---- | ---- | ---- | +| llama.cpp | yes | Radeon VII (gfx906) | +| diffusers | yes | Radeon VII (gfx906) | +| piper | yes | Radeon VII (gfx906) | +| whisper | no | none | +| autogptq | no | none | +| bark | no | none | +| coqui | no | none | +| transformers | no | none | +| exllama | no | none | +| exllama2 | no | none | +| mamba | no | none | +| petals | no | none | +| sentencetransformers | no | none | +| transformers-musicgen | no | none | +| vall-e-x | no | none | +| vllm | no | none | + +**You can help by expanding this list.** + +### System Prep + +1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html). +2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)) +3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html)) +4. Deploy. Yes it's that easy. + +#### Setup Example (Docker/containerd) + +The following are examples of the ROCm specific configuration elements required. + +```yaml +# docker-compose.yaml + # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes. + image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas + environment: + - DEBUG=true + # If your gpu is not already included in the current list of default targets the following build details are required. + - REBUILD=true + - BUILD_TYPE=hipblas + - GPU_TARGETS=gfx906 # Example for Radeon VII + devices: + # AMD GPU only require the following devices be passed through to the container for offloading to occur. + - /dev/dri + - /dev/kfd +``` + +The same can also be executed as a `run` for your container runtime + +``` +docker run \ + -e DEBUG=true \ + -e REBUILD=true \ + -e BUILD_TYPE=hipblas \ + -e GPU_TARGETS=gfx906 \ + --device /dev/dri \ + --device /dev/kfd \ + quay.io/go-skynet/local-ai:master-aio-gpu-hipblas +``` + +Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command. + +The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size. + +#### Example (k8s) (Advanced Deployment/WIP) + +For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu). +For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatability. +After this has been completed the [helm chart from go-skynet](https://github.com/go-skynet/helm-charts) can be configured and deployed mostly un-edited. + +The following are details of the changes that should be made to ensure proper function. +While these details may be configurable in the `values.yaml` development of this Helm chart is ongoing and is subject to change. + +The following details indicate the final state of the localai deployment relevant to GPU function. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {NAME}-local-ai +... +spec: + ... + template: + ... + spec: + containers: + - env: + - name: HIP_VISIBLE_DEVICES + value: '0' + # This variable indicates the devices availible to container (0:device1 1:device2 2:device3) etc. + # For multiple devices (say device 1 and 3) the value would be equivelant to HIP_VISIBLE_DEVICES="0,2" + # Please take note of this when an iGPU is present in host system as compatability is not assured. + ... + resources: + limits: + amd.com/gpu: '1' + requests: + amd.com/gpu: '1' +``` + +This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatability is not gauranteed. + +### Notes + +- When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing). +- AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using. +- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart. + ## Intel acceleration (sycl) ### Requirements diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md index 40f01f06..b5253ee4 100644 --- a/docs/content/docs/reference/aio-images.md +++ b/docs/content/docs/reference/aio-images.md @@ -9,13 +9,14 @@ All-In-One images are images that come pre-configured with a set of models and b In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below -| Category | Model name | Real model | -| Text Generation | `gpt-4` | `phi-2`(CPU) or `hermes-2-pro-mistral`(GPU) | -| Multimodal | `gpt-4-vision-preview` | `bakllava`(CPU) or `llava-1.6-mistral`(GPU) | -| Text generation | `stablediffusion` | `stablediffusion`(CPU) `dreamshaper-8` (GPU) | -| Audio transcription | `whisper-1` | `whisper` with the `whisper-base` model | -| Text to Audio | `tts-1` | the `en-us-amy-low.onnx` model with `rhasspy` | -| Embeddings | `text-embedding-ada-002` | | +| Category | Model name | Real model (CPU) | Real model (GPU) | +| ---- | ---- | ---- | ---- | +| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` | +| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` | +| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` | +| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same | +| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same | +| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` | ## Usage From b6f0e80d54f3a0ab50688e0c391258a206f677d5 Mon Sep 17 00:00:00 2001 From: Taikono-Himazin Date: Mon, 22 Apr 2024 23:37:13 +0900 Subject: [PATCH 0347/2895] Update text-generation.md (#2095) Signed-off-by: Taikono-Himazin --- docs/content/docs/features/text-generation.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md index c11894e7..3f3f0b56 100644 --- a/docs/content/docs/features/text-generation.md +++ b/docs/content/docs/features/text-generation.md @@ -257,6 +257,10 @@ parameters: # swap_space: 2 # Uncomment to specify the maximum length of a sequence (including prompt and output) # max_model_len: 32768 +# Uncomment and specify the number of Tensor divisions. +# Allows you to partition and run large models. Performance gains are limited. +# https://github.com/vllm-project/vllm/issues/1435 +# tensor_parallel_size: 2 ``` The backend will automatically download the required files in order to run the model. @@ -356,4 +360,4 @@ template: completion: | {{.Input}} -``` \ No newline at end of file +``` From bd507678be6a45e81e1fb9f96e7620c6c4eb162f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 23 Apr 2024 00:04:57 +0200 Subject: [PATCH 0348/2895] :arrow_up: Update docs version mudler/LocalAI (#2105) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 6a618115..55eebaeb 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.4" + "version": "null" } From 0d8bf91699a9deee596011cb1c30be29ec680685 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 09:22:58 +0200 Subject: [PATCH 0349/2895] feat: Galleries UI (#2104) * WIP: add models to webui Signed-off-by: Ettore Di Giacinto * Register routes Signed-off-by: Ettore Di Giacinto * fix: don't cache models Signed-off-by: Ettore Di Giacinto * small fixups Signed-off-by: Ettore Di Giacinto * fix: fixup multiple installs (strings.Clone) Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- core/config/backend_config.go | 6 +- core/http/app.go | 6 +- core/http/elements/gallery.go | 171 +++++++++++++++++++++++++ core/http/endpoints/localai/welcome.go | 6 +- core/http/routes/localai.go | 3 +- core/http/routes/ui.go | 107 ++++++++++++++++ core/http/routes/welcome.go | 6 +- core/http/views/models.html | 40 ++++++ core/http/views/partials/head.html | 67 +++++++++- core/http/views/partials/navbar.html | 1 + docs/content/docs/overview.md | 2 +- go.mod | 5 +- go.sum | 2 + main.go | 2 +- pkg/downloader/progress.go | 13 ++ pkg/downloader/uri.go | 4 +- pkg/gallery/models.go | 4 +- pkg/gallery/op.go | 5 +- pkg/startup/model_preload.go | 2 +- 20 files changed, 431 insertions(+), 23 deletions(-) create mode 100644 core/http/elements/gallery.go create mode 100644 core/http/routes/ui.go create mode 100644 core/http/views/models.html diff --git a/README.md b/README.md index e28e3cb0..0b32febd 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) -**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. +**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler). ## 🔥🔥 Hot topics / Roadmap diff --git a/core/config/backend_config.go b/core/config/backend_config.go index dfc216dc..64182e75 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -512,7 +512,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { for i, config := range cl.configs { // Download files and verify their SHA - for _, file := range config.DownloadFiles { + for i, file := range config.DownloadFiles { log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) if err := utils.VerifyPath(file.Filename, modelPath); err != nil { @@ -521,7 +521,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { // Create file path filePath := filepath.Join(modelPath, file.Filename) - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { return err } } @@ -535,7 +535,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { // check if file exists if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status) if err != nil { return err } diff --git a/core/http/app.go b/core/http/app.go index 1061627f..21652dd9 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -186,10 +186,14 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + galleryService := services.NewGalleryService(appConfig.ModelPath) + galleryService.Start(appConfig.Context, cl) + routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth) - routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, auth) + routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth) routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) + routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) // Define a custom 404 handler // Note: keep this at the bottom! diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go new file mode 100644 index 00000000..370ca82d --- /dev/null +++ b/core/http/elements/gallery.go @@ -0,0 +1,171 @@ +package elements + +import ( + "fmt" + + "github.com/chasefleming/elem-go" + "github.com/chasefleming/elem-go/attrs" + "github.com/go-skynet/LocalAI/pkg/gallery" +) + +func DoneProgress(uid string) string { + return elem.Div( + attrs.Props{}, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text("Installation completed"), + ), + ).Render() +} + +func ErrorProgress(err string) string { + return elem.Div( + attrs.Props{}, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text("Error"+err), + ), + ).Render() +} + +func ProgressBar(progress string) string { + return elem.Div(attrs.Props{ + "class": "progress", + "role": "progressbar", + "aria-valuemin": "0", + "aria-valuemax": "100", + "aria-valuenow": "0", + "aria-labelledby": "pblabel", + }, + elem.Div(attrs.Props{ + "id": "pb", + "class": "progress-bar", + "style": "width:" + progress + "%", + }), + ).Render() +} + +func StartProgressBar(uid, progress string) string { + if progress == "" { + progress = "0" + } + return elem.Div(attrs.Props{ + "hx-trigger": "done", + "hx-get": "/browse/job/" + uid, + "hx-swap": "outerHTML", + "hx-target": "this", + }, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text("Installing"), + // This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms. + elem.Div(attrs.Props{ + "hx-get": "/browse/job/progress/" + uid, + "hx-trigger": "every 600ms", + "hx-target": "this", + "hx-swap": "innerHTML", + }, + elem.Raw(ProgressBar(progress)), + ), + ), + ).Render() +} + +func ListModels(models []*gallery.GalleryModel) string { + modelsElements := []elem.Node{} + span := func(s string) elem.Node { + return elem.Span( + attrs.Props{ + "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", + }, + elem.Text(s), + ) + } + installButton := func(m *gallery.GalleryModel) elem.Node { + return elem.Button( + attrs.Props{ + "class": "float-right inline-block rounded bg-primary px-6 pb-2 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + // post the Model ID as param + "hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name), + }, + elem.Text("Install"), + ) + } + + descriptionDiv := func(m *gallery.GalleryModel) elem.Node { + + return elem.Div( + attrs.Props{ + "class": "p-6", + }, + elem.H5( + attrs.Props{ + "class": "mb-2 text-xl font-medium leading-tight", + }, + elem.Text(m.Name), + ), + elem.P( + attrs.Props{ + "class": "mb-4 text-base", + }, + elem.Text(m.Description), + ), + ) + } + + actionDiv := func(m *gallery.GalleryModel) elem.Node { + return elem.Div( + attrs.Props{ + "class": "px-6 pt-4 pb-2", + }, + elem.Span( + attrs.Props{ + "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + }, + elem.Text("Repository: "+m.Gallery.Name), + ), + elem.If(m.Installed, span("Installed"), installButton(m)), + ) + } + + for _, m := range models { + modelsElements = append(modelsElements, + elem.Div( + attrs.Props{ + "class": "me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface p-2", + }, + elem.Div( + attrs.Props{ + "class": "p-6", + }, + descriptionDiv(m), + actionDiv(m), + // elem.If(m.Installed, span("Installed"), installButton(m)), + + // elem.If(m.Installed, span("Installed"), span("Not Installed")), + ), + ), + ) + } + + wrapper := elem.Div(attrs.Props{ + "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-2 ", + }, modelsElements...) + + return wrapper.Render() +} diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index fd3e6230..291422c6 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -3,12 +3,16 @@ package localai import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/internal" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) func WelcomeEndpoint(appConfig *config.ApplicationConfig, - models []string, backendConfigs []config.BackendConfig) func(*fiber.Ctx) error { + cl *config.BackendConfigLoader, ml *model.ModelLoader) func(*fiber.Ctx) error { return func(c *fiber.Ctx) error { + models, _ := ml.ListModels() + backendConfigs := cl.GetAllBackendConfigs() + summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), "Version": internal.PrintableVersion(), diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 2651a53e..6415c894 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -14,13 +14,12 @@ func RegisterLocalAIRoutes(app *fiber.App, cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, + galleryService *services.GalleryService, auth func(*fiber.Ctx) error) { app.Get("/swagger/*", swagger.HandlerDefault) // default // LocalAI API endpoints - galleryService := services.NewGalleryService(appConfig.ModelPath) - galleryService.Start(appConfig.Context, cl) modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go new file mode 100644 index 00000000..b9ccd89a --- /dev/null +++ b/core/http/routes/ui.go @@ -0,0 +1,107 @@ +package routes + +import ( + "fmt" + "html/template" + "strings" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/elements" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" +) + +func RegisterUIRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + galleryService *services.GalleryService, + auth func(*fiber.Ctx) error) { + + // Show the Models page + app.Get("/browse", auth, func(c *fiber.Ctx) error { + models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + + summary := fiber.Map{ + "Title": "LocalAI API - Models", + "Models": template.HTML(elements.ListModels(models)), + // "ApplicationConfig": appConfig, + } + + // Render index + return c.Render("views/models", summary) + }) + + // HTMX: return the model details + // https://htmx.org/examples/active-search/ + app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error { + form := struct { + Search string `form:"search"` + }{} + if err := c.BodyParser(&form); err != nil { + return c.Status(fiber.StatusBadRequest).SendString(err.Error()) + } + + models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + + filteredModels := []*gallery.GalleryModel{} + for _, m := range models { + if strings.Contains(m.Name, form.Search) { + filteredModels = append(filteredModels, m) + } + } + + return c.SendString(elements.ListModels(filteredModels)) + }) + + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // strings.Clone is required! + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + op := gallery.GalleryOp{ + Id: uid, + GalleryName: galleryID, + Galleries: appConfig.Galleries, + } + go func() { + galleryService.C <- op + }() + + return c.SendString(elements.StartProgressBar(uid, "0")) + }) + + // https://htmx.org/examples/progress-bar/ + app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { + jobUID := c.Params("uid") + + status := galleryService.GetStatus(jobUID) + if status == nil { + //fmt.Errorf("could not find any status for ID") + return c.SendString(elements.ProgressBar("0")) + } + + if status.Progress == 100 { + c.Set("HX-Trigger", "done") + return c.SendString(elements.ProgressBar("100")) + } + if status.Error != nil { + return c.SendString(elements.ErrorProgress(status.Error.Error())) + } + + return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) + }) + + app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + return c.SendString(elements.DoneProgress(c.Params("uid"))) + }) +} diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go index 29b9e586..6b600d2d 100644 --- a/core/http/routes/welcome.go +++ b/core/http/routes/welcome.go @@ -13,11 +13,7 @@ func RegisterPagesRoutes(app *fiber.App, appConfig *config.ApplicationConfig, auth func(*fiber.Ctx) error) { - models, _ := ml.ListModels() - backendConfigs := cl.GetAllBackendConfigs() - if !appConfig.DisableWelcomePage { - app.Get("/", auth, localai.WelcomeEndpoint(appConfig, models, backendConfigs)) + app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml)) } - } diff --git a/core/http/views/models.html b/core/http/views/models.html new file mode 100644 index 00000000..63c6bba0 --- /dev/null +++ b/core/http/views/models.html @@ -0,0 +1,40 @@ + + +{{template "views/partials/head" .}} + + +
+ + {{template "views/partials/navbar" .}} +
+
+

Welcome to your LocalAI instance!

+
+ +
+

The FOSS alternative to OpenAI, Claude, ...

+ + Documentation + +
+ +
+

Available models from repositories

+ + + + +
{{.Models}}
+
+
+ + {{template "views/partials/footer" .}} +
+ + + diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html index 59cdea33..9dbfecdb 100644 --- a/core/http/views/partials/head.html +++ b/core/http/views/partials/head.html @@ -3,11 +3,76 @@ {{.Title}} - + + + + + \ No newline at end of file diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index c3d3223f..36332ed2 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -9,6 +9,7 @@ diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 5224bc49..f0f59494 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -56,7 +56,7 @@ icon = "info" -**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler). +**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler). ## Start LocalAI diff --git a/go.mod b/go.mod index 0bf9aa02..9485383e 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/go-skynet/LocalAI -go 1.21 +go 1.21.1 + +toolchain go1.22.2 require ( github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf @@ -71,6 +73,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chasefleming/elem-go v0.25.0 // indirect github.com/containerd/continuity v0.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect diff --git a/go.sum b/go.sum index 55fdaf06..b68834b2 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps= +github.com/chasefleming/elem-go v0.25.0 h1:LYzr1auk39Bh3bdKloArOFV7sOBnOfSOKxsg58eWL0Q= +github.com/chasefleming/elem-go v0.25.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f6vg71RUilJAA4= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= diff --git a/main.go b/main.go index 9976906b..04f13d3f 100644 --- a/main.go +++ b/main.go @@ -72,7 +72,7 @@ Version: ${version} kong.Vars{ "basepath": kong.ExpandPath("."), "remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml", - "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml"}]`, + "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`, "version": internal.PrintableVersion(), }, ) diff --git a/pkg/downloader/progress.go b/pkg/downloader/progress.go index 6806f586..6cd6132b 100644 --- a/pkg/downloader/progress.go +++ b/pkg/downloader/progress.go @@ -5,6 +5,8 @@ import "hash" type progressWriter struct { fileName string total int64 + fileNo int + totalFiles int written int64 downloadStatus func(string, string, string, float64) hash hash.Hash @@ -16,6 +18,17 @@ func (pw *progressWriter) Write(p []byte) (n int, err error) { if pw.total > 0 { percentage := float64(pw.written) / float64(pw.total) * 100 + if pw.totalFiles > 1 { + // This is a multi-file download + // so we need to adjust the percentage + // to reflect the progress of the whole download + // This is the file pw.fileNo of pw.totalFiles files. We assume that + // the files before successfully downloaded. + percentage = percentage / float64(pw.totalFiles) + if pw.fileNo > 1 { + percentage += float64(pw.fileNo-1) * 100 / float64(pw.totalFiles) + } + } //log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) } else { diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index b678ae0d..46ccd6a1 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -136,7 +136,7 @@ func removePartialFile(tmpFilePath string) error { return nil } -func DownloadFile(url string, filePath, sha string, downloadStatus func(string, string, string, float64)) error { +func DownloadFile(url string, filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error { url = ConvertURL(url) // Check if the file already exists _, err := os.Stat(filePath) @@ -209,6 +209,8 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string, fileName: tmpFilePath, total: resp.ContentLength, hash: sha256.New(), + fileNo: fileN, + totalFiles: total, downloadStatus: downloadStatus, } _, err = io.Copy(io.MultiWriter(outFile, progress), resp.Body) diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 10caedee..59971bbc 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -102,7 +102,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides } // Download files and verify their SHA - for _, file := range config.Files { + for i, file := range config.Files { log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) if err := utils.VerifyPath(file.Filename, basePath); err != nil { @@ -111,7 +111,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides // Create file path filePath := filepath.Join(basePath, file.Filename) - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil { + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil { return err } } diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go index 99796812..73d748bf 100644 --- a/pkg/gallery/op.go +++ b/pkg/gallery/op.go @@ -1,11 +1,12 @@ package gallery type GalleryOp struct { - Req GalleryModel Id string - Galleries []Gallery GalleryName string ConfigURL string + + Req GalleryModel + Galleries []Gallery } type GalleryOpStatus struct { diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index b09516a7..d267d846 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -54,7 +54,7 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model // check if file exists if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { + err := downloader.DownloadFile(url, modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) { utils.DisplayDownloadFunction(fileName, current, total, percent) }) if err != nil { From 8e36fe9b6fc51c0a13a18302b647655b52fff0aa Mon Sep 17 00:00:00 2001 From: fakezeta Date: Tue, 23 Apr 2024 18:42:17 +0200 Subject: [PATCH 0350/2895] Transformers Backend: max_tokens adherence to OpenAI API (#2108) max token adherence to OpenAI API improve adherence to OpenAI API when max tokens is omitted or equal to 0 in the request --- .../python/transformers/transformers_server.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 1b38a956..90053ed5 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -159,6 +159,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): quantization_config=quantization, device_map=device_map, torch_dtype=compute) + if request.ContextSize > 0: + self.max_tokens = request.ContextSize + else: + self.max_tokens = self.model.config.max_position_embeddings + self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) self.XPU = False @@ -217,10 +222,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.TopK == 0: request.TopK = 40 - max_tokens = 200 - if request.Tokens > 0: - max_tokens = request.Tokens - prompt = request.Prompt if not request.Prompt and request.UseTokenizerTemplate and request.Messages: prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) @@ -232,6 +233,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word)) inputs = self.tokenizer(prompt, return_tensors="pt") + + if request.Tokens > 0: + max_tokens = request.Tokens + else: + max_tokens = self.max_tokens - inputs["input_ids"].size()[inputs["input_ids"].dim()-1] + if self.CUDA: inputs = inputs.to("cuda") if XPU and self.OV == False: From 3411e072ca8d5c4a34267287ded4a2ad03bfb36d Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Tue, 23 Apr 2024 11:43:00 -0500 Subject: [PATCH 0351/2895] Fix cleanup sonarqube findings (#2106) * fix: update dockerignore and gitignore to exclude sonarqube work dir Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove useless equality check Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: use sonarqube Dockerfile recommendations Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .dockerignore | 5 ++++- .gitignore | 3 +++ Dockerfile | 23 +++++++++++----------- core/http/endpoints/openai/assistant.go | 26 ++++++++++++------------- 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/.dockerignore b/.dockerignore index 2c394c48..ea2ea6b2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,4 +5,7 @@ models examples/chatbot-ui/models examples/rwkv/models examples/**/models -Dockerfile* \ No newline at end of file +Dockerfile* + +# SonarQube +.scannerwork \ No newline at end of file diff --git a/.gitignore b/.gitignore index f1f860e9..9338b0c4 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,6 @@ prepare *.pb.go *pb2.py *pb2_grpc.py + +# SonarQube +.scannerwork \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 805ac3a6..4bc8b35e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ ARG BASE_IMAGE=ubuntu:22.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} # extras or core -FROM ${BASE_IMAGE} as requirements-core +FROM ${BASE_IMAGE} AS requirements-core USER root @@ -24,7 +24,7 @@ RUN apt-get update && \ apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean # Install Go -RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz +RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz ENV PATH $PATH:/usr/local/go/bin # Install grpc compilers @@ -80,7 +80,7 @@ RUN test -n "$TARGETARCH" \ ################################### ################################### -FROM requirements-core as requirements-extras +FROM requirements-core AS requirements-extras RUN apt install -y gpg && \ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ @@ -105,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \ ################################### ################################### -FROM ${GRPC_BASE_IMAGE} as grpc +FROM ${GRPC_BASE_IMAGE} AS grpc ARG MAKEFLAGS ARG GRPC_VERSION=v1.58.0 @@ -121,16 +121,15 @@ RUN apt-get update && \ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc -RUN cd grpc && \ - mkdir -p cmake/build && \ - cd cmake/build && \ - cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \ +WORKDIR /build/grpc/cmake/build + +RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \ make ################################### ################################### -FROM requirements-${IMAGE_TYPE} as builder +FROM requirements-${IMAGE_TYPE} AS builder ARG GO_TAGS="stablediffusion tts" ARG GRPC_BACKENDS @@ -168,9 +167,11 @@ RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build COPY --from=grpc /build/grpc ./grpc/ -RUN cd /build/grpc/cmake/build && make install +WORKDIR /build/grpc/cmake/build +RUN make install # Rebuild with defaults backends +WORKDIR /build RUN make build RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ @@ -288,7 +289,7 @@ RUN mkdir -p /build/models # Define the health check command HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ - CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1 + CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1 VOLUME /build/models EXPOSE 8080 diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index dceb3789..c1efd8bd 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -455,21 +455,19 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model for i, assistant := range Assistants { if assistant.ID == assistantID { for j, fileId := range assistant.FileIDs { - if fileId == fileId { - Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...) + Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...) - // Check if the file exists in the assistantFiles slice - for i, assistantFile := range AssistantFiles { - if assistantFile.ID == fileId { - // Remove the file from the assistantFiles slice - AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) - utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) - return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ - ID: fileId, - Object: "assistant.file.deleted", - Deleted: true, - }) - } + // Check if the file exists in the assistantFiles slice + for i, assistantFile := range AssistantFiles { + if assistantFile.ID == fileId { + // Remove the file from the assistantFiles slice + AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) + return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: true, + }) } } } From d344daf129e5d4504ce29ada434b6e6b1025ce31 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 18:43:25 +0200 Subject: [PATCH 0352/2895] feat(models-ui): minor visual enhancements (#2109) Show image if present, URL, tags, and better display buttons Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 96 ++++++++++++++++++++++++++++++----- core/http/routes/ui.go | 10 ++-- core/http/views/models.html | 18 +++---- 3 files changed, 96 insertions(+), 28 deletions(-) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 370ca82d..405f42ae 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -86,6 +86,18 @@ func StartProgressBar(uid, progress string) string { ).Render() } +func cardSpan(text, icon string) elem.Node { + return elem.Span( + attrs.Props{ + "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + }, + elem.I(attrs.Props{ + "class": icon + " pr-2", + }), + elem.Text(text), + ) +} + func ListModels(models []*gallery.GalleryModel) string { modelsElements := []elem.Node{} span := func(s string) elem.Node { @@ -99,10 +111,17 @@ func ListModels(models []*gallery.GalleryModel) string { installButton := func(m *gallery.GalleryModel) elem.Node { return elem.Button( attrs.Props{ - "class": "float-right inline-block rounded bg-primary px-6 pb-2 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", // post the Model ID as param "hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name), }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-download pr-2", + }, + ), elem.Text("Install"), ) } @@ -111,7 +130,7 @@ func ListModels(models []*gallery.GalleryModel) string { return elem.Div( attrs.Props{ - "class": "p-6", + "class": "p-6 text-surface dark:text-white", }, elem.H5( attrs.Props{ @@ -129,42 +148,93 @@ func ListModels(models []*gallery.GalleryModel) string { } actionDiv := func(m *gallery.GalleryModel) elem.Node { + nodes := []elem.Node{ + cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"), + } + + if m.License != "" { + nodes = append(nodes, + cardSpan("License: "+m.License, "fas fa-book"), + ) + } + + for _, tag := range m.Tags { + nodes = append(nodes, + cardSpan(tag, "fas fa-tag"), + ) + } + + for i, url := range m.URLs { + nodes = append(nodes, + elem.A( + attrs.Props{ + "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + "href": url, + "target": "_blank", + }, + elem.I(attrs.Props{ + "class": "fas fa-link pr-2", + }), + elem.Text("Link #"+fmt.Sprintf("%d", i+1)), + )) + } + return elem.Div( attrs.Props{ "class": "px-6 pt-4 pb-2", }, - elem.Span( + elem.P( attrs.Props{ - "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + "class": "mb-4 text-base", }, - elem.Text("Repository: "+m.Gallery.Name), + nodes..., ), elem.If(m.Installed, span("Installed"), installButton(m)), ) } for _, m := range models { + + elems := []elem.Node{} + + if m.Icon != "" { + elems = append(elems, + + elem.Div(attrs.Props{ + "class": "flex justify-center items-center", + }, + elem.A(attrs.Props{ + "href": "#!", + // "class": "justify-center items-center", + }, + elem.Img(attrs.Props{ + // "class": "rounded-t-lg object-fit object-center h-96", + "class": "rounded-t-lg max-h-48 max-w-96 object-cover", + "src": m.Icon, + }), + ), + )) + } + + elems = append(elems, descriptionDiv(m), actionDiv(m)) modelsElements = append(modelsElements, elem.Div( attrs.Props{ - "class": "me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface p-2", + "class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2", }, elem.Div( attrs.Props{ - "class": "p-6", + // "class": "p-6", }, - descriptionDiv(m), - actionDiv(m), - // elem.If(m.Installed, span("Installed"), installButton(m)), - - // elem.If(m.Installed, span("Installed"), span("Not Installed")), + elems..., ), ), ) } wrapper := elem.Div(attrs.Props{ - "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-2 ", + "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark", + //"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark", }, modelsElements...) return wrapper.Render() diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index b9ccd89a..c64ec5ff 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -26,8 +26,9 @@ func RegisterUIRoutes(app *fiber.App, models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) summary := fiber.Map{ - "Title": "LocalAI API - Models", - "Models": template.HTML(elements.ListModels(models)), + "Title": "LocalAI - Models", + "Models": template.HTML(elements.ListModels(models)), + "Repositories": appConfig.Galleries, // "ApplicationConfig": appConfig, } @@ -49,7 +50,10 @@ func RegisterUIRoutes(app *fiber.App, filteredModels := []*gallery.GalleryModel{} for _, m := range models { - if strings.Contains(m.Name, form.Search) { + if strings.Contains(m.Name, form.Search) || + strings.Contains(m.Description, form.Search) || + strings.Contains(m.Gallery.Name, form.Search) || + strings.Contains(strings.Join(m.Tags, ","), form.Search) { filteredModels = append(filteredModels, m) } } diff --git a/core/http/views/models.html b/core/http/views/models.html index 63c6bba0..be3c1bef 100644 --- a/core/http/views/models.html +++ b/core/http/views/models.html @@ -7,20 +7,14 @@ {{template "views/partials/navbar" .}}
-
-

Welcome to your LocalAI instance!

-
- -
-

The FOSS alternative to OpenAI, Claude, ...

- - Documentation - -
-

Available models from repositories

- +

+ 🖼️ Available models from {{ len .Repositories }} repositories + +

+ + Date: Tue, 23 Apr 2024 19:35:45 +0200 Subject: [PATCH 0353/2895] feat(gallery): add llama3, hermes, phi-3, and others (#2110) Also adds embeddings and llava models Signed-off-by: Ettore Di Giacinto --- gallery/codellama.yaml | 21 + gallery/dreamshaper.yaml | 26 ++ gallery/hermes-2-pro-mistral.yaml | 81 ++++ gallery/index.yaml | 668 ++++++++++++++++++++---------- gallery/llama3-instruct.yaml | 64 +++ gallery/llava.yaml | 32 ++ gallery/phi-2-chat.yaml | 50 +++ gallery/phi-2-orange.yaml | 33 ++ gallery/phi-3-chat.yaml | 31 ++ gallery/piper.yaml | 15 + gallery/sentencetransformers.yaml | 12 + 11 files changed, 804 insertions(+), 229 deletions(-) create mode 100644 gallery/codellama.yaml create mode 100644 gallery/dreamshaper.yaml create mode 100644 gallery/hermes-2-pro-mistral.yaml create mode 100644 gallery/llama3-instruct.yaml create mode 100644 gallery/llava.yaml create mode 100644 gallery/phi-2-chat.yaml create mode 100644 gallery/phi-2-orange.yaml create mode 100644 gallery/phi-3-chat.yaml create mode 100644 gallery/piper.yaml create mode 100644 gallery/sentencetransformers.yaml diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml new file mode 100644 index 00000000..1b773ed6 --- /dev/null +++ b/gallery/codellama.yaml @@ -0,0 +1,21 @@ +name: "codellama" +license: llama2 + +description: | + Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. + +urls: +- https://huggingface.co/TheBloke/CodeLlama-7B-GGUF +- https://huggingface.co/meta-llama/CodeLlama-7b-hf + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + backend: llama-cpp + context_size: 4096 + f16: true + mmap: true \ No newline at end of file diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml new file mode 100644 index 00000000..894ae0cf --- /dev/null +++ b/gallery/dreamshaper.yaml @@ -0,0 +1,26 @@ +name: "dreamshaper" +icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png +license: other + +description: | + A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. + +urls: +- https://civitai.com/models/4384/dreamshaper + +tags: +- text-to-image +- stablediffusion +- sd-1.5 +- gpu + +config_file: | + backend: diffusers + step: 25 + f16: true + + diffusers: + pipeline_type: StableDiffusionPipeline + cuda: true + enable_parameters: "negative_prompt,num_inference_steps" + scheduler_type: "k_dpmpp_2m" diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml new file mode 100644 index 00000000..5a79d5cb --- /dev/null +++ b/gallery/hermes-2-pro-mistral.yaml @@ -0,0 +1,81 @@ +name: "hermes-2-pro-mistral" +icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png +license: apache-2.0 + +description: | + Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. + + This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation. + + Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below. + + This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI + + Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main + +urls: +- https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + parameters: + model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf + + template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + + <|im_end|> + {{.Input -}} + <|im_start|>assistant + + chat: | + {{.Input -}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - + - "\n" + - "\n\n\n" + diff --git a/gallery/index.yaml b/gallery/index.yaml index 6b882768..4582838e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,503 +1,713 @@ +## LLM + +### START LLAMA3 +- &llama3 + url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" + name: "llama3-8b-instruct" + overrides: + parameters: + model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + files: + - filename: vicuna-7b-q5_k.gguf + sha256: cce3ba85525027d0fff520cad053d5a6f32c293382a40b3d55a650282c267787 + uri: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + +### START LLaVa +- &llava + url: "github:mudler/LocalAI/gallery/llava.yaml@master" + name: "llava-1.6-vicuna" + overrides: + mmproj: mmproj-vicuna7b-f16.gguf + parameters: + model: vicuna-7b-q5_k.gguf + files: + - filename: vicuna-7b-q5_k.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf + - filename: mmproj-vicuna7b-f16.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf +- <<: *llava + name: "llava-1.6-mistral" + overrides: + mmproj: llava-v1.6-7b-mmproj-f16.gguf + parameters: + model: llava-v1.6-mistral-7b.gguf + files: + - filename: llava-v1.6-mistral-7b.gguf + sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf + - filename: llava-v1.6-7b-mmproj-f16.gguf + sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf +- <<: *llava + name: "llava-1.5" + overrides: + mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf + parameters: + model: llava-v1.5-7b-Q4_K.gguf + files: + - filename: llava-v1.5-7b-Q4_K.gguf + sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9 + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf + - filename: llava-v1.5-7b-mmproj-Q8_0.gguf + sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf +### START Phi-2 +- &phi-2 + url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" + name: "phi-2-chat:Q8_0" + overrides: + parameters: + model: phi-2-layla-v1-chatml-Q8_0.gguf + files: + - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" + sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" +- <<: *phi-2 + name: "phi-2-chat" + overrides: + parameters: + model: phi-2-layla-v1-chatml-Q4_K.gguf + files: + - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" + sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" +- <<: *phi-2 + name: "phi-2-orange" + overrides: + parameters: + model: phi-2-orange.Q4_0.gguf + files: + - filename: "phi-2-orange.Q4_0.gguf" + sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf" + uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf" +### START Phi-3 +- &phi-3 + url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" + name: "phi-3-mini-4k-instruct" + overrides: + parameters: + model: Phi-3-mini-4k-instruct-q4.gguf + files: + - filename: "Phi-3-mini-4k-instruct-q4.gguf" + sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" +- <<: *phi-3 + name: "phi-3-mini-4k-instruct:fp16" + overrides: + parameters: + model: Phi-3-mini-4k-instruct-fp16.gguf + files: + - filename: "Phi-3-mini-4k-instruct-fp16.gguf" + sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" +### START Hermes-2-Pro-Mistral +- &hermes-2-pro-mistral + url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" + name: "hermes-2-pro-mistral" + overrides: + parameters: + model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf + files: + - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf" + sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf" +- <<: *hermes-2-pro-mistral + name: "hermes-2-pro-mistral:Q6_K" + overrides: + parameters: + model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf + files: + - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf" + sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" +- <<: *hermes-2-pro-mistral + name: "hermes-2-pro-mistral" + overrides: + parameters: + model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf + files: + - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" + sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" +### END Hermes-2-Pro-Mistral + +### START Codellama +- &codellama + url: "github:mudler/LocalAI/gallery/codellama.yaml@master" + name: "codellama-7b" + overrides: + parameters: + model: codellama-7b.Q4_0.gguf + files: + - filename: "codellama-7b.Q4_0.gguf" + sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" + uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" + +### START Embeddings +- &sentencentransformers + name: "all-MiniLM-L6-v2" + url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" + overrides: + parameters: + model: all-MiniLM-L6-v2 + +### START Image generation +- &diffusers + name: dreamshaper + url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" + overrides: + parameters: + model: DreamShaper_8_pruned.safetensors + files: + - filename: DreamShaper_8_pruned.safetensors + uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors + sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd + ## Whisper - url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" - license: other + ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" - license: other + - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "text-embedding-ada-002" - license: other + ## Stable Diffusion - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master name: stablediffusion - license: other + ## Tiny Dream - url: github:mudler/LocalAI/gallery/tinydream.yaml@master name: tinydream - license: other + ## Piper TTS -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: en-us-kathleen-low.onnx files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ca-upc_ona-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: ca-upc_ona-x-low.onnx files: - filename: voice-ca-upc_ona-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ca-upc_pau-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: ca-upc_pau-x-low.onnx files: - filename: voice-ca-upc_pau-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-da-nst_talesyntese-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: da-nst_talesyntese-medium.onnx files: - filename: voice-da-nst_talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-eva_k-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-eva_k-x-low.onnx files: - filename: voice-de-eva_k-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-karlsson-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-karlsson-low.onnx files: - filename: voice-de-karlsson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-kerstin-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-kerstin-low.onnx files: - filename: voice-de-kerstin-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-pavoque-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-pavoque-low.onnx files: - filename: voice-de-pavoque-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-ramona-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-ramona-low.onnx files: - filename: voice-de-ramona-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-thorsten-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: de-thorsten-low.onnx files: - filename: voice-de-thorsten-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-el-gr-rapunzelina-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: el-gr-rapunzelina-low.onnx files: - filename: voice-el-gr-rapunzelina-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-alan-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-gb-alan-low.onnx files: - filename: voice-en-gb-alan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-southern_english_female-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-gb-southern_english files: - filename: voice-en-gb-southern_english_female-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-amy-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-amy-low.onnx files: - filename: voice-en-us-amy-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-danny-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-danny-low.onnx files: - filename: voice-en-us-danny-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-kathleen-low.onnx files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-lessac-low.onnx files: - filename: voice-en-us-lessac-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-lessac-medium.onnx files: - filename: voice-en-us-lessac-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-libritts-high - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-libritts-high.onnx files: - filename: voice-en-us-libritts-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-high - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-ryan-high.onnx files: - filename: voice-en-us-ryan-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-ryan-low.onnx files: - filename: voice-en-us-ryan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-ryan-medium.onnx files: - filename: voice-en-us-ryan-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us_lessac - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: en-us-lessac.onnx files: - filename: voice-en-us_lessac.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-carlfm-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: es-carlfm-x-low.onnx files: - filename: voice-es-carlfm-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_10246-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: es-mls_10246-low.onnx files: - filename: voice-es-mls_10246-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_9972-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: es-mls_9972-low.onnx files: - filename: voice-es-mls_9972-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fi-harri-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fi-harri-low.onnx files: - filename: voice-fi-harri-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-gilles-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-gilles-low.onnx files: - filename: voice-fr-gilles-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-mls_1840-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-mls_1840-low.onnx files: - filename: voice-fr-mls_1840-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-siwis-low.onnx files: - filename: voice-fr-siwis-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-siwis-medium.onnx files: - filename: voice-fr-siwis-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-bui-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-bui-medium.onnx files: - filename: voice-is-bui-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-salka-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-salka-medium.onnx files: - filename: voice-is-salka-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-steinn-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-steinn-medium.onnx files: - filename: voice-is-steinn-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-ugla-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-ugla-medium.onnx files: - filename: voice-is-ugla-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-it-riccardo_fasol-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: it-riccardo_fasol-x-low.onnx files: - filename: voice-it-riccardo_fasol-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-iseke-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: kk-iseke-x-low.onnx files: - filename: voice-kk-iseke-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-issai-high - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: kk-issai-high.onnx files: - filename: voice-kk-issai-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-raya-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: kk-raya-x-low.onnx files: - filename: voice-kk-raya-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: ne-google-medium.onnx files: - filename: voice-ne-google-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: ne-google-x-low.onnx files: - filename: voice-ne-google-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_5809-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-mls_5809-low.onnx files: - filename: voice-nl-mls_5809-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_7432-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-mls_7432-low.onnx files: - filename: voice-nl-mls_7432-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-nathalie-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-nathalie-x-low.onnx files: - filename: voice-nl-nathalie-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-rdh-medium.onnx files: - filename: voice-nl-rdh-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-rdh-x-low.onnx files: - filename: voice-nl-rdh-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-no-talesyntese-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: no-talesyntese-medium.onnx files: - filename: voice-no-talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pl-mls_6892-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: pl-mls_6892-low.onnx files: - filename: voice-pl-mls_6892-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pt-br-edresson-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: pt-br-edresson-low.onnx files: - filename: voice-pt-br-edresson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ru-irinia-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: ru-irinia-medium.onnx files: - filename: voice-ru-irinia-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-sv-se-nst-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: sv-se-nst-medium.onnx files: - filename: voice-sv-se-nst-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-uk-lada-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: uk-lada-x-low.onnx files: - filename: voice-uk-lada-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-25hours-single-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: vi-25hours-single-low.onnx files: - filename: voice-vi-25hours-single-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-vivos-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: vi-vivos-x-low.onnx files: - filename: voice-vi-vivos-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh-cn-huayan-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: zh-cn-huayan-x-low.onnx files: - filename: voice-zh-cn-huayan-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh_CN-huayan-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: zh_CN-huayan-medium.onnx files: - filename: voice-zh_CN-huayan-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz \ No newline at end of file diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml new file mode 100644 index 00000000..4e29e740 --- /dev/null +++ b/gallery/llama3-instruct.yaml @@ -0,0 +1,64 @@ +name: "llama3-instruct" +license: llama3 + +description: | + Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. + + Model developers Meta + + Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. + + Input Models input text only. + + Output Models generate text and code only. + + Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. +urls: +- https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} + context_size: 8192 + f16: true + stopwords: + - <|im_end|> + - + - "<|eot_id|>" diff --git a/gallery/llava.yaml b/gallery/llava.yaml new file mode 100644 index 00000000..159ae34c --- /dev/null +++ b/gallery/llava.yaml @@ -0,0 +1,32 @@ +name: "llava" +license: apache-2.0 + +description: | + LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. + +urls: +- https://llava-vl.github.io/ + +tags: +- llm +- multimodal +- gguf +- gpu +- cpu + +config_file: | + backend: llama-cpp + context_size: 4096 + f16: true + + mmap: true + roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + + template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml new file mode 100644 index 00000000..3370311f --- /dev/null +++ b/gallery/phi-2-chat.yaml @@ -0,0 +1,50 @@ +name: "phi-2-chatml" +license: mit + +description: | + Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation. + + The dataset has been pre-processed by doing the following: + + - remove all refusals + - remove any mention of AI assistant + - split any multi-turn dialog generated in the dataset into multi-turn conversations records + - added nfsw generated conversations from the Teatime dataset + + Developed by: l3utterfly + Funded by: Layla Network + Model type: Phi + Language(s) (NLP): English + License: MIT + Finetuned from model: Phi-2 + + +urls: +- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml +- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + # parameters: + # model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf + + template: + chat_message: | + <|im_start|>{{ .RoleName }} + {{.Content}}<|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml new file mode 100644 index 00000000..9800f8da --- /dev/null +++ b/gallery/phi-2-orange.yaml @@ -0,0 +1,33 @@ +name: "phi-2-orange" +license: mit +icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" +description: | + A two-step finetune of Phi-2, with a bit of zest. + + There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. +urls: +- https://huggingface.co/rhysjones/phi-2-orange +- https://huggingface.co/TheBloke/phi-2-orange-GGUF + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}}<|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml new file mode 100644 index 00000000..24dbc20f --- /dev/null +++ b/gallery/phi-3-chat.yaml @@ -0,0 +1,31 @@ +name: "phi-3-chat" +license: mit + +description: | + The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. + +urls: +- https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + template: + chat_message: | + <|{{ .RoleName }}|> + {{.Content}}<|end|> + chat: | + {{.Input}} + <|assistant|> + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|end|> + diff --git a/gallery/piper.yaml b/gallery/piper.yaml new file mode 100644 index 00000000..d759ba92 --- /dev/null +++ b/gallery/piper.yaml @@ -0,0 +1,15 @@ +config_file: | + backend: piper +icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png +license: mit + +urls: + - https://github.com/rhasspy/piper + +description: | + A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). + +tags: +- tts +- text-to-speech +- cpu diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml new file mode 100644 index 00000000..1830cce3 --- /dev/null +++ b/gallery/sentencetransformers.yaml @@ -0,0 +1,12 @@ +name: "sentencetransformers" +description: | + This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. + urls: + - https://github.com/UKPLab/sentence-transformers +tags: +- gpu +- cpu +- embeddings + +config_file: | + backend: sentencetransformers \ No newline at end of file From 55778b35fff7909927e7699a8232eceec0f5c340 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 19:47:42 +0200 Subject: [PATCH 0354/2895] fix(gallery): move metadata where it belongs Signed-off-by: Ettore Di Giacinto --- gallery/bert-embeddings.yaml | 6 +- gallery/codellama.yaml | 14 -- gallery/dreamshaper.yaml | 13 -- gallery/hermes-2-pro-mistral.yaml | 21 -- gallery/index.yaml | 375 ++++++++++++++++++++++++------ gallery/llama3-instruct.yaml | 21 -- gallery/llava.yaml | 13 -- gallery/phi-2-chat.yaml | 31 --- gallery/phi-2-orange.yaml | 15 -- gallery/phi-3-chat.yaml | 13 -- gallery/piper.yaml | 13 -- gallery/sentencetransformers.yaml | 8 - gallery/stablediffusion.yaml | 6 - gallery/tinydream.yaml | 6 - gallery/whisper-base.yaml | 6 - 15 files changed, 310 insertions(+), 251 deletions(-) diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml index 0798bf54..01f05f33 100644 --- a/gallery/bert-embeddings.yaml +++ b/gallery/bert-embeddings.yaml @@ -1,9 +1,5 @@ name: "bert-embeddings" -license: "Apache 2.0" -urls: -- https://huggingface.co/skeskinen/ggml -description: | - Bert model that can be used for embeddings + config_file: | parameters: model: bert-MiniLM-L6-v2q4_0.bin diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml index 1b773ed6..a4c3233f 100644 --- a/gallery/codellama.yaml +++ b/gallery/codellama.yaml @@ -1,18 +1,4 @@ name: "codellama" -license: llama2 - -description: | - Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. - -urls: -- https://huggingface.co/TheBloke/CodeLlama-7B-GGUF -- https://huggingface.co/meta-llama/CodeLlama-7b-hf - -tags: -- llm -- gguf -- gpu -- cpu config_file: | backend: llama-cpp diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml index 894ae0cf..219a1e53 100644 --- a/gallery/dreamshaper.yaml +++ b/gallery/dreamshaper.yaml @@ -1,18 +1,5 @@ name: "dreamshaper" -icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png -license: other -description: | - A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. - -urls: -- https://civitai.com/models/4384/dreamshaper - -tags: -- text-to-image -- stablediffusion -- sd-1.5 -- gpu config_file: | backend: diffusers diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml index 5a79d5cb..d4771a11 100644 --- a/gallery/hermes-2-pro-mistral.yaml +++ b/gallery/hermes-2-pro-mistral.yaml @@ -1,26 +1,5 @@ name: "hermes-2-pro-mistral" -icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png -license: apache-2.0 -description: | - Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. - - This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation. - - Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below. - - This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI - - Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main - -urls: -- https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/index.yaml b/gallery/index.yaml index 4582838e..bb1c5250 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4,6 +4,28 @@ - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" name: "llama3-8b-instruct" + license: llama3 + + description: | + Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. + + Model developers Meta + + Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. + + Input Models input text only. + + Output Models generate text and code only. + + Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. + urls: + - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf @@ -15,6 +37,20 @@ ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" + license: apache-2.0 + + description: | + LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. + + urls: + - https://llava-vl.github.io/ + + tags: + - llm + - multimodal + - gguf + - gpu + - cpu name: "llava-1.6-vicuna" overrides: mmproj: mmproj-vicuna7b-f16.gguf @@ -52,8 +88,36 @@ sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf ### START Phi-2 -- &phi-2 +- &phi-2-chat url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" + license: mit + + description: | + Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation. + + The dataset has been pre-processed by doing the following: + + - remove all refusals + - remove any mention of AI assistant + - split any multi-turn dialog generated in the dataset into multi-turn conversations records + - added nfsw generated conversations from the Teatime dataset + + Developed by: l3utterfly + Funded by: Layla Network + Model type: Phi + Language(s) (NLP): English + License: MIT + Finetuned from model: Phi-2 + + urls: + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf + + tags: + - llm + - gguf + - gpu + - cpu name: "phi-2-chat:Q8_0" overrides: parameters: @@ -62,7 +126,7 @@ - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" -- <<: *phi-2 +- <<: *phi-2-chat name: "phi-2-chat" overrides: parameters: @@ -71,7 +135,22 @@ - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" -- <<: *phi-2 +- <<: *phi-2-chat + license: mit + icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" + description: | + A two-step finetune of Phi-2, with a bit of zest. + + There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. + urls: + - https://huggingface.co/rhysjones/phi-2-orange + - https://huggingface.co/TheBloke/phi-2-orange-GGUF + + tags: + - llm + - gguf + - gpu + - cpu name: "phi-2-orange" overrides: parameters: @@ -84,6 +163,19 @@ - &phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" + license: mit + + description: | + The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. + + urls: + - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: Phi-3-mini-4k-instruct-q4.gguf @@ -104,6 +196,28 @@ - &hermes-2-pro-mistral url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" + icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png + license: apache-2.0 + + description: | + Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. + + This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation. + + Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below. + + This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI + + Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main + + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf @@ -135,6 +249,20 @@ - &codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" name: "codellama-7b" + license: llama2 + + description: | + Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. + + urls: + - https://huggingface.co/TheBloke/CodeLlama-7B-GGUF + - https://huggingface.co/meta-llama/CodeLlama-7b-hf + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: codellama-7b.Q4_0.gguf @@ -145,6 +273,14 @@ ### START Embeddings - &sentencentransformers + description: | + This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. + urls: + - https://github.com/UKPLab/sentence-transformers + tags: + - gpu + - cpu + - embeddings name: "all-MiniLM-L6-v2" url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" overrides: @@ -152,8 +288,22 @@ model: all-MiniLM-L6-v2 ### START Image generation -- &diffusers +- &dreamshaper name: dreamshaper + icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png + license: other + + description: | + A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. + + urls: + - https://civitai.com/models/4384/dreamshaper + + tags: + - text-to-image + - stablediffusion + - sd-1.5 + - gpu url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" overrides: parameters: @@ -166,32 +316,71 @@ ## Whisper - url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" + license: "MIT" + urls: + - https://github.com/ggerganov/whisper.cpp + - https://huggingface.co/ggerganov/whisper.cpp + + description: | + Port of OpenAI's Whisper model in C/C++ ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" - -- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" - name: "text-embedding-ada-002" - + license: "Apache 2.0" + urls: + - https://huggingface.co/skeskinen/ggml + tags: + - embeddings + description: | + Bert model that can be used for embeddings + ## Stable Diffusion - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master - name: stablediffusion + license: "BSD-3" + urls: + - https://github.com/EdVince/Stable-Diffusion-NCNN + - https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + + description: | + Stable Diffusion in NCNN with c++, supported txt2img and img2img + name: stablediffusion-cpp ## Tiny Dream - url: github:mudler/LocalAI/gallery/tinydream.yaml@master name: tinydream - + license: "BSD-3" + urls: + - https://github.com/symisc/tiny-dream + - https://github.com/symisc/tiny-dream/blob/main/LICENSE + + description: | + An embedded, Header Only, Stable Diffusion C++ implementation ## Piper TTS -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- &piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low + icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png + license: mit + + urls: + - https://github.com/rhasspy/piper + + description: | + A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). + + tags: + - tts + - text-to-speech + - cpu + override: parameters: model: en-us-kathleen-low.onnx files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper name: voice-ca-upc_ona-x-low override: parameters: @@ -199,7 +388,8 @@ files: - filename: voice-ca-upc_ona-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ca-upc_pau-x-low override: parameters: @@ -207,7 +397,8 @@ files: - filename: voice-ca-upc_pau-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-da-nst_talesyntese-medium override: parameters: @@ -215,7 +406,8 @@ files: - filename: voice-da-nst_talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-eva_k-x-low override: parameters: @@ -223,7 +415,8 @@ files: - filename: voice-de-eva_k-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-karlsson-low override: parameters: @@ -231,7 +424,8 @@ files: - filename: voice-de-karlsson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-kerstin-low override: parameters: @@ -239,7 +433,8 @@ files: - filename: voice-de-kerstin-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-pavoque-low override: parameters: @@ -247,7 +442,8 @@ files: - filename: voice-de-pavoque-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-ramona-low override: parameters: @@ -255,7 +451,8 @@ files: - filename: voice-de-ramona-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-thorsten-low override: @@ -264,7 +461,8 @@ files: - filename: voice-de-thorsten-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-el-gr-rapunzelina-low override: @@ -273,7 +471,8 @@ files: - filename: voice-el-gr-rapunzelina-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-alan-low override: @@ -282,7 +481,8 @@ files: - filename: voice-en-gb-alan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-southern_english_female-low override: @@ -291,7 +491,8 @@ files: - filename: voice-en-gb-southern_english_female-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-amy-low override: @@ -300,7 +501,8 @@ files: - filename: voice-en-us-amy-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-danny-low override: @@ -309,7 +511,8 @@ files: - filename: voice-en-us-danny-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low override: @@ -318,7 +521,8 @@ files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-low override: @@ -327,7 +531,8 @@ files: - filename: voice-en-us-lessac-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-medium override: @@ -336,7 +541,8 @@ files: - filename: voice-en-us-lessac-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-libritts-high override: @@ -345,7 +551,8 @@ files: - filename: voice-en-us-libritts-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-high override: @@ -354,7 +561,8 @@ files: - filename: voice-en-us-ryan-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-low override: @@ -364,7 +572,8 @@ - filename: voice-en-us-ryan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-medium override: @@ -374,7 +583,8 @@ - filename: voice-en-us-ryan-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us_lessac override: parameters: @@ -383,7 +593,8 @@ - filename: voice-en-us_lessac.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-carlfm-x-low override: parameters: @@ -392,7 +603,8 @@ - filename: voice-es-carlfm-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_10246-low override: @@ -402,7 +614,8 @@ - filename: voice-es-mls_10246-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_9972-low override: @@ -412,7 +625,8 @@ - filename: voice-es-mls_9972-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fi-harri-low override: @@ -422,7 +636,8 @@ - filename: voice-fi-harri-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-gilles-low override: @@ -432,7 +647,8 @@ - filename: voice-fr-gilles-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-mls_1840-low override: @@ -442,7 +658,8 @@ - filename: voice-fr-mls_1840-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-low override: @@ -452,7 +669,8 @@ - filename: voice-fr-siwis-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-medium override: @@ -462,7 +680,8 @@ - filename: voice-fr-siwis-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-bui-medium override: @@ -472,7 +691,8 @@ - filename: voice-is-bui-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-salka-medium override: @@ -482,7 +702,8 @@ - filename: voice-is-salka-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-steinn-medium override: @@ -492,7 +713,8 @@ - filename: voice-is-steinn-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-ugla-medium override: @@ -502,7 +724,8 @@ - filename: voice-is-ugla-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-it-riccardo_fasol-x-low override: @@ -512,7 +735,8 @@ - filename: voice-it-riccardo_fasol-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-iseke-x-low override: @@ -522,7 +746,8 @@ - filename: voice-kk-iseke-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-issai-high override: @@ -532,7 +757,8 @@ - filename: voice-kk-issai-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-raya-x-low override: @@ -542,7 +768,8 @@ - filename: voice-kk-raya-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-medium override: @@ -552,7 +779,8 @@ - filename: voice-ne-google-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-x-low override: @@ -562,7 +790,8 @@ - filename: voice-ne-google-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_5809-low override: @@ -572,7 +801,8 @@ - filename: voice-nl-mls_5809-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_7432-low override: @@ -582,7 +812,8 @@ - filename: voice-nl-mls_7432-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-nathalie-x-low override: @@ -592,7 +823,8 @@ - filename: voice-nl-nathalie-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-medium override: @@ -602,7 +834,8 @@ - filename: voice-nl-rdh-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-x-low override: @@ -612,7 +845,8 @@ - filename: voice-nl-rdh-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-no-talesyntese-medium override: @@ -622,7 +856,8 @@ - filename: voice-no-talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pl-mls_6892-low override: @@ -632,7 +867,8 @@ - filename: voice-pl-mls_6892-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pt-br-edresson-low override: @@ -642,7 +878,8 @@ - filename: voice-pt-br-edresson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ru-irinia-medium override: @@ -652,7 +889,8 @@ - filename: voice-ru-irinia-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-sv-se-nst-medium override: @@ -662,7 +900,8 @@ - filename: voice-sv-se-nst-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-uk-lada-x-low override: @@ -672,7 +911,8 @@ - filename: voice-uk-lada-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-25hours-single-low override: @@ -682,7 +922,8 @@ - filename: voice-vi-25hours-single-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-vivos-x-low override: @@ -692,7 +933,8 @@ - filename: voice-vi-vivos-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh-cn-huayan-x-low override: @@ -702,7 +944,8 @@ - filename: voice-zh-cn-huayan-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh_CN-huayan-medium override: diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml index 4e29e740..96272c58 100644 --- a/gallery/llama3-instruct.yaml +++ b/gallery/llama3-instruct.yaml @@ -1,26 +1,5 @@ name: "llama3-instruct" -license: llama3 -description: | - Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. - - Model developers Meta - - Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. - - Input Models input text only. - - Output Models generate text and code only. - - Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. -urls: -- https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/llava.yaml b/gallery/llava.yaml index 159ae34c..44c1aa97 100644 --- a/gallery/llava.yaml +++ b/gallery/llava.yaml @@ -1,18 +1,5 @@ name: "llava" -license: apache-2.0 -description: | - LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. - -urls: -- https://llava-vl.github.io/ - -tags: -- llm -- multimodal -- gguf -- gpu -- cpu config_file: | backend: llama-cpp diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml index 3370311f..3fc84d3b 100644 --- a/gallery/phi-2-chat.yaml +++ b/gallery/phi-2-chat.yaml @@ -1,39 +1,8 @@ name: "phi-2-chatml" -license: mit -description: | - Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation. - - The dataset has been pre-processed by doing the following: - - - remove all refusals - - remove any mention of AI assistant - - split any multi-turn dialog generated in the dataset into multi-turn conversations records - - added nfsw generated conversations from the Teatime dataset - - Developed by: l3utterfly - Funded by: Layla Network - Model type: Phi - Language(s) (NLP): English - License: MIT - Finetuned from model: Phi-2 - - -urls: -- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml -- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true - # parameters: - # model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf - template: chat_message: | <|im_start|>{{ .RoleName }} diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml index 9800f8da..645875ad 100644 --- a/gallery/phi-2-orange.yaml +++ b/gallery/phi-2-orange.yaml @@ -1,19 +1,4 @@ name: "phi-2-orange" -license: mit -icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" -description: | - A two-step finetune of Phi-2, with a bit of zest. - - There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. -urls: -- https://huggingface.co/rhysjones/phi-2-orange -- https://huggingface.co/TheBloke/phi-2-orange-GGUF - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml index 24dbc20f..b17e5bb4 100644 --- a/gallery/phi-3-chat.yaml +++ b/gallery/phi-3-chat.yaml @@ -1,17 +1,4 @@ name: "phi-3-chat" -license: mit - -description: | - The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. - -urls: -- https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/piper.yaml b/gallery/piper.yaml index d759ba92..eb1a6ecc 100644 --- a/gallery/piper.yaml +++ b/gallery/piper.yaml @@ -1,15 +1,2 @@ config_file: | backend: piper -icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png -license: mit - -urls: - - https://github.com/rhasspy/piper - -description: | - A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). - -tags: -- tts -- text-to-speech -- cpu diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml index 1830cce3..9ba5d29b 100644 --- a/gallery/sentencetransformers.yaml +++ b/gallery/sentencetransformers.yaml @@ -1,12 +1,4 @@ name: "sentencetransformers" -description: | - This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. - urls: - - https://github.com/UKPLab/sentence-transformers -tags: -- gpu -- cpu -- embeddings config_file: | backend: sentencetransformers \ No newline at end of file diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml index c8a0eb8b..9b1cad32 100644 --- a/gallery/stablediffusion.yaml +++ b/gallery/stablediffusion.yaml @@ -1,11 +1,5 @@ name: "stablediffusion-cpp" -license: "BSD-3" -urls: -- https://github.com/EdVince/Stable-Diffusion-NCNN -- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE -description: | - Stable Diffusion in NCNN with c++, supported txt2img and img2img config_file: | name: stablediffusion-cpp backend: stablediffusion diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml index 415762de..6e39414c 100644 --- a/gallery/tinydream.yaml +++ b/gallery/tinydream.yaml @@ -1,11 +1,5 @@ name: "tinydream" -license: "BSD-3" -urls: - - https://github.com/symisc/tiny-dream - - https://github.com/symisc/tiny-dream/blob/main/LICENSE -description: | - An embedded, Header Only, Stable Diffusion C++ implementation config_file: | name: tinydream backend: tinydream diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml index 574dbb13..f654a37c 100644 --- a/gallery/whisper-base.yaml +++ b/gallery/whisper-base.yaml @@ -1,11 +1,5 @@ name: "whisper-base" -license: "MIT" -urls: -- https://github.com/ggerganov/whisper.cpp -- https://huggingface.co/ggerganov/whisper.cpp -description: | - Port of OpenAI's Whisper model in C/C++ config_file: | backend: whisper From a09fe1b9babcfec6e91f596b6597a4030d9552fc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:00:20 +0200 Subject: [PATCH 0355/2895] fix(gallery): set margin for images Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 405f42ae..f2b4f8dd 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -209,7 +209,7 @@ func ListModels(models []*gallery.GalleryModel) string { }, elem.Img(attrs.Props{ // "class": "rounded-t-lg object-fit object-center h-96", - "class": "rounded-t-lg max-h-48 max-w-96 object-cover", + "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", "src": m.Icon, }), ), From d2bea6f9e3c30056b5d1adcfc6dd3ff5fae560af Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:01:56 +0200 Subject: [PATCH 0356/2895] fix(gallery): fixup hermes q8 entry Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index bb1c5250..16916703 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -235,7 +235,7 @@ sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" - <<: *hermes-2-pro-mistral - name: "hermes-2-pro-mistral" + name: "hermes-2-pro-mistral:Q8_0" overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf From 34c3f563fd4c50162dc4e64eb4cd9265ac4afb05 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:05:59 +0200 Subject: [PATCH 0357/2895] fix(gallery): fixup dreamshaper icon Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 16916703..deab29cf 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -290,7 +290,7 @@ ### START Image generation - &dreamshaper name: dreamshaper - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png + icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg license: other description: | From ac56ac2b2da3bba78122b2e80eb36afc28e51056 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:10:58 +0200 Subject: [PATCH 0358/2895] fix(gallery): show a fake image if no there is no icon (#2111) Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 40 ++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index f2b4f8dd..c03750da 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -8,6 +8,10 @@ import ( "github.com/go-skynet/LocalAI/pkg/gallery" ) +const ( + NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" +) + func DoneProgress(uid string) string { return elem.Div( attrs.Props{}, @@ -197,25 +201,27 @@ func ListModels(models []*gallery.GalleryModel) string { elems := []elem.Node{} - if m.Icon != "" { - elems = append(elems, - - elem.Div(attrs.Props{ - "class": "flex justify-center items-center", - }, - elem.A(attrs.Props{ - "href": "#!", - // "class": "justify-center items-center", - }, - elem.Img(attrs.Props{ - // "class": "rounded-t-lg object-fit object-center h-96", - "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", - "src": m.Icon, - }), - ), - )) + if m.Icon == "" { + m.Icon = NoImage } + elems = append(elems, + + elem.Div(attrs.Props{ + "class": "flex justify-center items-center", + }, + elem.A(attrs.Props{ + "href": "#!", + // "class": "justify-center items-center", + }, + elem.Img(attrs.Props{ + // "class": "rounded-t-lg object-fit object-center h-96", + "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", + "src": m.Icon, + }), + ), + )) + elems = append(elems, descriptionDiv(m), actionDiv(m)) modelsElements = append(modelsElements, elem.Div( From f718a391c03c1b1ac870e9a083ca686613bac48f Mon Sep 17 00:00:00 2001 From: fakezeta Date: Wed, 24 Apr 2024 02:45:37 +0200 Subject: [PATCH 0359/2895] fix missing TrustRemoteCode in OpenVINO model load (#2114) --- backend/python/transformers/transformers_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 90053ed5..2f4140c2 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -149,6 +149,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): device_map="CPU" self.model = OVModelForCausalLM.from_pretrained(model_name, compile=True, + trust_remote_code=request.TrustRemoteCode, ov_config={"PERFORMANCE_HINT": "LATENCY"}, device=device_map) self.OV = True From 2fb34b00b5c5daa1b60c46a5b535d30c5acf35fc Mon Sep 17 00:00:00 2001 From: jtwolfe Date: Wed, 24 Apr 2024 17:17:49 +1000 Subject: [PATCH 0360/2895] Incl ocv pkg for diffsusers utils (#2115) * Update diffusers.yml Signed-off-by: jtwolfe * Update diffusers-rocm.yml Signed-off-by: jtwolfe --------- Signed-off-by: jtwolfe --- backend/python/diffusers/diffusers-rocm.yml | 1 + backend/python/diffusers/diffusers.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/backend/python/diffusers/diffusers-rocm.yml b/backend/python/diffusers/diffusers-rocm.yml index f261701d..97b2ce0f 100644 --- a/backend/python/diffusers/diffusers-rocm.yml +++ b/backend/python/diffusers/diffusers-rocm.yml @@ -61,4 +61,5 @@ dependencies: - urllib3==2.0.6 - zipp==3.17.0 - torch + - opencv-python prefix: /opt/conda/envs/diffusers diff --git a/backend/python/diffusers/diffusers.yml b/backend/python/diffusers/diffusers.yml index b1a7d9f9..d5d2913e 100644 --- a/backend/python/diffusers/diffusers.yml +++ b/backend/python/diffusers/diffusers.yml @@ -71,4 +71,5 @@ dependencies: - typing-extensions==4.8.0 - urllib3==2.0.6 - zipp==3.17.0 + - opencv-python prefix: /opt/conda/envs/diffusers From d65214a234d703812dec346be9fb929579382a6b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Apr 2024 11:11:41 +0200 Subject: [PATCH 0361/2895] :arrow_up: Update docs version mudler/LocalAI (#2113) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 55eebaeb..6a618115 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "null" + "version": "v2.12.4" } From 4fffc47e77db8a56bbc89fcac57e6c2ca369789e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Apr 2024 18:44:04 +0200 Subject: [PATCH 0362/2895] deps(llama.cpp): update, use better model for function call tests (#2119) deps(llama.cpp): update Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- core/http/app_test.go | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 761c76d6..1923f956 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=b8109bc0139f15a5b321909f47510b89dca47ffc +CPPLLAMA_VERSION?=4e96a812b3ce7322a29a3008db2ed73d9087b176 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all diff --git a/core/http/app_test.go b/core/http/app_test.go index 35e0a8bf..3699c0ed 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -489,11 +489,10 @@ var _ = Describe("API test", func() { if runtime.GOOS != "linux" { Skip("test supported only on linux") } - modelName := "codellama" + + modelName := "hermes-2-pro-mistral" response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml", - Name: modelName, - Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128}, + ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml", }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -556,7 +555,7 @@ var _ = Describe("API test", func() { var res map[string]string err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) - Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) + Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) }) From 23eac98b3c4aa62f75fc75ddbaf6a1b81326a22f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Apr 2024 19:43:07 +0200 Subject: [PATCH 0363/2895] docs: update hot topics Signed-off-by: Ettore Di Giacinto --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0b32febd..343a7cf5 100644 --- a/README.md +++ b/README.md @@ -50,13 +50,13 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Reranker API: https://github.com/mudler/LocalAI/pull/2121 +- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104 - llama3: https://github.com/mudler/LocalAI/discussions/2076 - Parler-TTS: https://github.com/mudler/LocalAI/pull/2027 -- Landing page: https://github.com/mudler/LocalAI/pull/1922 - Openvino support: https://github.com/mudler/LocalAI/pull/1892 - Vector store: https://github.com/mudler/LocalAI/pull/1795 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 -- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715 Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 From 9dbd217c5972a56563f8a362f82469891349879f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Apr 2024 19:56:51 +0200 Subject: [PATCH 0364/2895] docs(integrations): add Wave terminal Signed-off-by: Ettore Di Giacinto --- docs/content/docs/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index 29f2db17..a7666e77 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -15,6 +15,7 @@ The list below is a list of software that integrates with LocalAI. - [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm) - [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI. - https://plugins.jetbrains.com/plugin/21056-codegpt allows for custom OpenAI compatible endpoints since 2.4.0 +- [Wave Terminal](https://docs.waveterm.dev/features/supportedLLMs/localai) has native support for LocalAI! - https://github.com/longy2k/obsidian-bmo-chatbot - https://github.com/FlowiseAI/Flowise - https://github.com/k8sgpt-ai/k8sgpt From d30280ed23600beb083bc69ca988f7212c1581cd Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Apr 2024 23:55:30 +0200 Subject: [PATCH 0365/2895] :arrow_up: Update ggerganov/whisper.cpp (#2122) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1923f956..c1fe9a48 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028 +WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From e16658b7ec065d9893202cbf15937140eea8119f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 00:00:17 +0200 Subject: [PATCH 0366/2895] :arrow_up: Update ggerganov/llama.cpp (#2123) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c1fe9a48..662e54bd 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4e96a812b3ce7322a29a3008db2ed73d9087b176 +CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b664edde292210d66b5f05c4ac5069d9123d1b38 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 00:19:02 +0200 Subject: [PATCH 0367/2895] feat(rerankers): Add new backend, support jina rerankers API (#2121) Signed-off-by: Ettore Di Giacinto --- .github/workflows/test-extra.yml | 31 +++++ Dockerfile | 5 +- Makefile | 13 +- aio/cpu/rerank.yaml | 27 ++++ aio/entrypoint.sh | 2 +- aio/gpu-8g/rerank.yaml | 27 ++++ aio/intel/rerank.yaml | 27 ++++ backend/backend.proto | 24 ++++ .../transformers/transformers-nvidia.yml | 2 + .../transformers/transformers-rocm.yml | 2 + .../common-env/transformers/transformers.yml | 4 +- backend/python/rerankers/Makefile | 27 ++++ backend/python/rerankers/README.md | 5 + backend/python/rerankers/reranker.py | 123 ++++++++++++++++++ backend/python/rerankers/run.sh | 14 ++ backend/python/rerankers/test.sh | 11 ++ backend/python/rerankers/test_reranker.py | 90 +++++++++++++ core/backend/rerank.go | 39 ++++++ core/http/app.go | 1 + core/http/endpoints/jina/rerank.go | 84 ++++++++++++ core/http/routes/jina.go | 19 +++ core/schema/jina.go | 34 +++++ pkg/grpc/backend.go | 2 + pkg/grpc/client.go | 16 +++ pkg/grpc/embed.go | 4 + 25 files changed, 628 insertions(+), 5 deletions(-) create mode 100644 aio/cpu/rerank.yaml create mode 100644 aio/gpu-8g/rerank.yaml create mode 100644 aio/intel/rerank.yaml create mode 100644 backend/python/rerankers/Makefile create mode 100644 backend/python/rerankers/README.md create mode 100755 backend/python/rerankers/reranker.py create mode 100755 backend/python/rerankers/run.sh create mode 100755 backend/python/rerankers/test.sh create mode 100755 backend/python/rerankers/test_reranker.py create mode 100644 core/backend/rerank.go create mode 100644 core/http/endpoints/jina/rerank.go create mode 100644 core/http/routes/jina.go create mode 100644 core/schema/jina.go diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index fa45cb3c..f9476d4d 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -74,6 +74,37 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/sentencetransformers make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test + + tests-rerankers: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install build-essential ffmpeg + curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + sudo apt-get update && \ + sudo apt-get install -y conda + sudo apt-get install -y ca-certificates cmake curl patch python3-pip + sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools + + sudo rm -rfv /usr/bin/conda || true + + - name: Test rerankers + run: | + export PATH=$PATH:/opt/conda/bin + make --jobs=5 --output-sync=target -C backend/python/rerankers + make --jobs=5 --output-sync=target -C backend/python/rerankers test + tests-diffusers: runs-on: ubuntu-latest steps: diff --git a/Dockerfile b/Dockerfile index 4bc8b35e..4d12cb56 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" ARG GO_TAGS="stablediffusion tinydream tts" @@ -259,6 +259,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/sentencetransformers \ ; fi +RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ + make -C backend/python/rerankers \ + ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/transformers \ ; fi diff --git a/Makefile b/Makefile index 662e54bd..b017982e 100644 --- a/Makefile +++ b/Makefile @@ -437,10 +437,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -506,6 +506,14 @@ petals-protogen: petals-protogen-clean: $(MAKE) -C backend/python/petals protogen-clean +.PHONY: rerankers-protogen +rerankers-protogen: + $(MAKE) -C backend/python/rerankers protogen + +.PHONY: rerankers-protogen-clean +rerankers-protogen-clean: + $(MAKE) -C backend/python/rerankers protogen-clean + .PHONY: sentencetransformers-protogen sentencetransformers-protogen: $(MAKE) -C backend/python/sentencetransformers protogen @@ -564,6 +572,7 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/vllm $(MAKE) -C backend/python/mamba $(MAKE) -C backend/python/sentencetransformers + $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/transformers-musicgen $(MAKE) -C backend/python/parler-tts diff --git a/aio/cpu/rerank.yaml b/aio/cpu/rerank.yaml new file mode 100644 index 00000000..b84755a8 --- /dev/null +++ b/aio/cpu/rerank.yaml @@ -0,0 +1,27 @@ +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index 5fd8d9c2..2487e64f 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -129,7 +129,7 @@ detect_gpu detect_gpu_size PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu -export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}" +export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}" check_vars diff --git a/aio/gpu-8g/rerank.yaml b/aio/gpu-8g/rerank.yaml new file mode 100644 index 00000000..b84755a8 --- /dev/null +++ b/aio/gpu-8g/rerank.yaml @@ -0,0 +1,27 @@ +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' diff --git a/aio/intel/rerank.yaml b/aio/intel/rerank.yaml new file mode 100644 index 00000000..b84755a8 --- /dev/null +++ b/aio/intel/rerank.yaml @@ -0,0 +1,27 @@ +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' diff --git a/backend/backend.proto b/backend/backend.proto index ec01e4a7..778a96ff 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -23,6 +23,30 @@ service Backend { rpc StoresDelete(StoresDeleteOptions) returns (Result) {} rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {} rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {} + + rpc Rerank(RerankRequest) returns (RerankResult) {} +} + +message RerankRequest { + string query = 1; + repeated string documents = 2; + int32 top_n = 3; +} + +message RerankResult { + Usage usage = 1; + repeated DocumentResult results = 2; +} + +message Usage { + int32 total_tokens = 1; + int32 prompt_tokens = 2; +} + +message DocumentResult { + int32 index = 1; + string text = 2; + float relevance_score = 3; } message StoresKey { diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index e12b5dbb..16e494c5 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -120,4 +120,6 @@ dependencies: - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 + - rerankers[transformers] + - pydantic prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index 48fac8bf..cdefcc27 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -108,4 +108,6 @@ dependencies: - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 + - rerankers[transformers] + - pydantic prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 843b13fa..5c069dd0 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -111,5 +111,7 @@ dependencies: - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - - xformers==0.0.23.post1 + - xformers==0.0.23.post1 + - rerankers[transformers] + - pydantic prefix: /opt/conda/envs/transformers diff --git a/backend/python/rerankers/Makefile b/backend/python/rerankers/Makefile new file mode 100644 index 00000000..f029c841 --- /dev/null +++ b/backend/python/rerankers/Makefile @@ -0,0 +1,27 @@ +.PHONY: rerankers +rerankers: protogen + $(MAKE) -C ../common-env/transformers + + +.PHONY: run +run: protogen + @echo "Running rerankers..." + bash run.sh + @echo "rerankers run." + +# It is not working well by using command line. It only6 works with IDE like VSCode. +.PHONY: test +test: protogen + @echo "Testing rerankers..." + bash test.sh + @echo "rerankers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/rerankers/README.md b/backend/python/rerankers/README.md new file mode 100644 index 00000000..9e73ba0a --- /dev/null +++ b/backend/python/rerankers/README.md @@ -0,0 +1,5 @@ +# Creating a separate environment for the reranker project + +``` +make reranker +``` \ No newline at end of file diff --git a/backend/python/rerankers/reranker.py b/backend/python/rerankers/reranker.py new file mode 100755 index 00000000..e1974ad5 --- /dev/null +++ b/backend/python/rerankers/reranker.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Extra gRPC server for Rerankers models. +""" +from concurrent import futures + +import argparse +import signal +import sys +import os + +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + +from rerankers import Reranker + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + A gRPC servicer for the backend service. + + This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. + """ + def Health(self, request, context): + """ + A gRPC method that returns the health status of the backend service. + + Args: + request: A HealthRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Reply object that contains the health status of the backend service. + """ + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + """ + A gRPC method that loads a model into memory. + + Args: + request: A LoadModelRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Result object that contains the result of the LoadModel operation. + """ + model_name = request.Model + try: + kwargs = {} + if request.Type != "": + kwargs['model_type'] = request.Type + if request.PipelineType != "": # Reuse the PipelineType field for language + kwargs['lang'] = request.PipelineType + self.model_name = model_name + self.model = Reranker(model_name, **kwargs) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + # Implement your logic here for the LoadModel service + # Replace this with your desired response + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def Rerank(self, request, context): + documents = [] + for idx, doc in enumerate(request.documents): + documents.append(doc) + ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents)))) + # Prepare results to return + results = [ + backend_pb2.DocumentResult( + index=res.doc_id, + text=res.text, + relevance_score=res.score + ) for res in ranked_results.results + ] + + # Calculate the usage and total tokens + # TODO: Implement the usage calculation with reranker + total_tokens = sum(len(doc.split()) for doc in request.documents) + len(request.query.split()) + prompt_tokens = len(request.query.split()) + usage = backend_pb2.Usage(total_tokens=total_tokens, prompt_tokens=prompt_tokens) + return backend_pb2.RerankResult(usage=usage, results=results) + +def serve(address): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("Server started. Listening on: " + address, file=sys.stderr) + + # Define the signal handler function + def signal_handler(sig, frame): + print("Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + # Set the signal handlers for SIGINT and SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + + serve(args.addr) diff --git a/backend/python/rerankers/run.sh b/backend/python/rerankers/run.sh new file mode 100755 index 00000000..16d8a0bd --- /dev/null +++ b/backend/python/rerankers/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +## +## A bash script wrapper that runs the reranker server with conda + +export PATH=$PATH:/opt/conda/bin + +# Activate conda environment +source activate transformers + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python $DIR/reranker.py $@ diff --git a/backend/python/rerankers/test.sh b/backend/python/rerankers/test.sh new file mode 100755 index 00000000..75316829 --- /dev/null +++ b/backend/python/rerankers/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +## +## A bash script wrapper that runs the reranker server with conda + +# Activate conda environment +source activate transformers + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python -m unittest $DIR/test_reranker.py \ No newline at end of file diff --git a/backend/python/rerankers/test_reranker.py b/backend/python/rerankers/test_reranker.py new file mode 100755 index 00000000..c1cf3d70 --- /dev/null +++ b/backend/python/rerankers/test_reranker.py @@ -0,0 +1,90 @@ +""" +A test script to test the gRPC service +""" +import unittest +import subprocess +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + + +class TestBackendServicer(unittest.TestCase): + """ + TestBackendServicer is the class that tests the gRPC service + """ + def setUp(self): + """ + This method sets up the gRPC service by starting the server + """ + self.service = subprocess.Popen(["python3", "reranker.py", "--addr", "localhost:50051"]) + time.sleep(10) + + def tearDown(self) -> None: + """ + This method tears down the gRPC service by terminating the server + """ + self.service.kill() + self.service.wait() + + def test_server_startup(self): + """ + This method tests if the server starts up successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + print(err) + self.fail("Server failed to start") + finally: + self.tearDown() + + def test_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_rerank(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + request = backend_pb2.RerankRequest( + query="I love you", + documents=["I hate you", "I really like you"], + top_n=2 + ) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder")) + self.assertTrue(response.success) + + rerank_response = stub.Rerank(request) + print(rerank_response.results[0]) + self.assertIsNotNone(rerank_response.results) + self.assertEqual(len(rerank_response.results), 2) + self.assertEqual(rerank_response.results[0].text, "I really like you") + self.assertEqual(rerank_response.results[1].text, "I hate you") + except Exception as err: + print(err) + self.fail("Reranker service failed") + finally: + self.tearDown() \ No newline at end of file diff --git a/core/backend/rerank.go b/core/backend/rerank.go new file mode 100644 index 00000000..810223aa --- /dev/null +++ b/core/backend/rerank.go @@ -0,0 +1,39 @@ +package backend + +import ( + "context" + "fmt" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" + model "github.com/go-skynet/LocalAI/pkg/model" +) + +func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) { + bb := backend + if bb == "" { + return nil, fmt.Errorf("backend is required") + } + + grpcOpts := gRPCModelOpts(backendConfig) + + opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ + model.WithBackendString(bb), + model.WithModel(modelFile), + model.WithContext(appConfig.Context), + model.WithAssetDir(appConfig.AssetsDestination), + model.WithLoadGRPCLoadModelOpts(grpcOpts), + }) + rerankModel, err := loader.BackendLoader(opts...) + if err != nil { + return nil, err + } + + if rerankModel == nil { + return nil, fmt.Errorf("could not load rerank model") + } + + res, err := rerankModel.Rerank(context.Background(), request) + + return res, err +} diff --git a/core/http/app.go b/core/http/app.go index 21652dd9..93eb0e20 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -194,6 +194,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) + routes.RegisterJINARoutes(app, cl, ml, appConfig, auth) // Define a custom 404 handler // Note: keep this at the bottom! diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go new file mode 100644 index 00000000..bf99367e --- /dev/null +++ b/core/http/endpoints/jina/rerank.go @@ -0,0 +1,84 @@ +package jina + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" +) + +func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + req := new(schema.JINARerankRequest) + if err := c.BodyParser(req); err != nil { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{ + "error": "Cannot parse JSON", + }) + } + + input := new(schema.TTSRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Request for model: %s", modelFile) + + if input.Backend != "" { + cfg.Backend = input.Backend + } + + request := &proto.RerankRequest{ + Query: req.Query, + TopN: int32(req.TopN), + Documents: req.Documents, + } + + results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg) + if err != nil { + return err + } + + response := &schema.JINARerankResponse{ + Model: req.Model, + } + + for _, r := range results.Results { + response.Results = append(response.Results, schema.JINADocumentResult{ + Index: int(r.Index), + Document: schema.JINAText{Text: r.Text}, + RelevanceScore: float64(r.RelevanceScore), + }) + } + + response.Usage.TotalTokens = int(results.Usage.TotalTokens) + response.Usage.PromptTokens = int(results.Usage.PromptTokens) + + return c.Status(fiber.StatusOK).JSON(response) + } +} diff --git a/core/http/routes/jina.go b/core/http/routes/jina.go new file mode 100644 index 00000000..9c32c72b --- /dev/null +++ b/core/http/routes/jina.go @@ -0,0 +1,19 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/jina" + + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterJINARoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + // POST endpoint to mimic the reranking + app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig)) +} diff --git a/core/schema/jina.go b/core/schema/jina.go new file mode 100644 index 00000000..7f80689c --- /dev/null +++ b/core/schema/jina.go @@ -0,0 +1,34 @@ +package schema + +// RerankRequest defines the structure of the request payload +type JINARerankRequest struct { + Model string `json:"model"` + Query string `json:"query"` + Documents []string `json:"documents"` + TopN int `json:"top_n"` +} + +// DocumentResult represents a single document result +type JINADocumentResult struct { + Index int `json:"index"` + Document JINAText `json:"document"` + RelevanceScore float64 `json:"relevance_score"` +} + +// Text holds the text of the document +type JINAText struct { + Text string `json:"text"` +} + +// RerankResponse defines the structure of the response payload +type JINARerankResponse struct { + Model string `json:"model"` + Usage JINAUsageInfo `json:"usage"` + Results []JINADocumentResult `json:"results"` +} + +// UsageInfo holds information about usage of tokens +type JINAUsageInfo struct { + TotalTokens int `json:"total_tokens"` + PromptTokens int `json:"prompt_tokens"` +} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 8fb8c39d..bef9e186 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -49,4 +49,6 @@ type Backend interface { StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) + + Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error) } diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 882db12a..fc4a12fa 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -355,3 +355,19 @@ func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts client := pb.NewBackendClient(conn) return client.StoresFind(ctx, in, opts...) } + +func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error) { + if !c.parallel { + c.opMutex.Lock() + defer c.opMutex.Unlock() + } + c.setBusy(true) + defer c.setBusy(false) + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewBackendClient(conn) + return client.Rerank(ctx, in, opts...) +} diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 73b185a3..694e83b0 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -101,6 +101,10 @@ func (e *embedBackend) StoresFind(ctx context.Context, in *pb.StoresFindOptions, return e.s.StoresFind(ctx, in) } +func (e *embedBackend) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error) { + return e.s.Rerank(ctx, in) +} + type embedBackendServerStream struct { ctx context.Context fn func(s []byte) From 48d0aa2f6da0b1c039fa062e61facf5e6191420e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 01:28:02 +0200 Subject: [PATCH 0368/2895] models(gallery): add new models to the gallery (#2124) * models: add reranker and parler-tts-mini Signed-off-by: Ettore Di Giacinto * fix: chatml im_end should not have a newline Signed-off-by: Ettore Di Giacinto * models(noromaid): add Signed-off-by: Ettore Di Giacinto * models(llama3): add 70b, add dolphin2.9 Signed-off-by: Ettore Di Giacinto * models(llama3): add unholy-8b Signed-off-by: Ettore Di Giacinto * models(llama3): add therapyllama3, aura Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 6 +- aio/gpu-8g/text-to-text.yaml | 6 +- aio/intel/text-to-text.yaml | 6 +- embedded/models/hermes-2-pro-mistral.yaml | 6 +- gallery/hermes-2-pro-mistral.yaml | 9 +- gallery/index.yaml | 205 +++++++++++++++++++++- gallery/noromaid.yaml | 53 ++++++ gallery/parler-tts.yaml | 2 + gallery/rerankers.yaml | 2 + pkg/model/loader_test.go | 11 +- 10 files changed, 272 insertions(+), 34 deletions(-) create mode 100644 gallery/noromaid.yaml create mode 100644 gallery/parler-tts.yaml create mode 100644 gallery/rerankers.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index cf18f659..f2f6aeb4 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -21,8 +21,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -37,8 +36,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 0407bb22..dc620a13 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -21,8 +21,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -37,8 +36,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index f5f93c14..bd6b87ba 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -22,8 +22,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -38,8 +37,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index dd18ce6f..74d98eeb 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -21,8 +21,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -37,8 +36,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml index d4771a11..b1dc0ff1 100644 --- a/gallery/hermes-2-pro-mistral.yaml +++ b/gallery/hermes-2-pro-mistral.yaml @@ -3,9 +3,6 @@ name: "hermes-2-pro-mistral" config_file: | mmap: true - parameters: - model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf - template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} @@ -24,8 +21,7 @@ config_file: | {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -40,8 +36,7 @@ config_file: | For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/gallery/index.yaml b/gallery/index.yaml index deab29cf..a5de760d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,5 +1,35 @@ -## LLM +### START parler-tts +- &parler-tts + url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" + name: parler-tts-mini-v0.1 + parameters: + model: parler-tts/parler_tts_mini_v0.1 + license: apache-2.0 + description: | + Parler-TTS is a lightweight text-to-speech (TTS) model that can generate high-quality, natural sounding speech in the style of a given speaker (gender, pitch, speaking style, etc). It is a reproduction of work from the paper Natural language guidance of high-fidelity text-to-speech with synthetic annotations by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. + urls: + - https://github.com/huggingface/parler-tts + tags: + - tts + - gpu + - cpu + - text-to-speech + - python +### START rerankers +- &rerankers + url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" + name: cross-encoder + parameters: + model: cross-encoder + license: apache-2.0 + description: | + A cross-encoder model that can be used for reranking + tags: + - reranker + - gpu + - python +## LLMs ### START LLAMA3 - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" @@ -20,20 +50,177 @@ Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. urls: - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - + - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF tags: - llm - gguf - gpu - cpu + - llama3 overrides: parameters: model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf files: - - filename: vicuna-7b-q5_k.gguf - sha256: cce3ba85525027d0fff520cad053d5a6f32c293382a40b3d55a650282c267787 - uri: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf + sha256: 19ded996fe6c60254dc7544d782276eff41046ed42aa5f2d0005dc457e5c0895 + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf +- <<: *llama3 + name: "llama3-8b-instruct:Q6_K" + overrides: + parameters: + model: Meta-Llama-3-8B-Instruct.Q6_K.gguf + files: + - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf + sha256: b7bad45618e2a76cc1e89a0fbb93a2cac9bf410e27a619c8024ed6db53aa9b4a + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf +- <<: *llama3 + name: "llama3-70b-instruct" + overrides: + parameters: + model: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + files: + - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-unholy-8b" + urls: + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png + description: | + Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. + Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3). + + If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them. + overrides: + parameters: + model: Llama-3-Unholy-8B.q4_k_m.gguf + files: + - filename: Llama-3-Unholy-8B.q4_k_m.gguf + sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf +- <<: *llama3 + name: "llama-3-unholy-8b:Q8_0" + urls: + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png + description: | + Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. + + Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3). + + If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them. + overrides: + parameters: + model: Llama-3-Unholy-8B.q8_0.gguf + files: + - filename: Llama-3-Unholy-8B.q8_0.gguf + sha256: 8d4137018acdcd57df4beccc84d9ad3f7f08cac50588f76370afc16c85752702 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf +- <<: *llama3 + name: "therapyllama-8b-v1" + urls: + - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/65f07d05279d2d8f725bf0c3/A-ckcZ9H0Ee1n_ls2FM41.png + description: | + Trained on Llama 3 8B using a modified version of jerryjalapeno/nart-100k-synthetic. + + It is a Llama 3 version of https://huggingface.co/victunes/TherapyBeagle-11B-v2 + + TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. + Usage: + Do not hold back on Buddy. + Open up to Buddy. + Pour your heart out to Buddy. + Engage with Buddy. + Remember that Buddy is just an AI. + Notes: + + Tested with the Llama 3 Format + You might be assigned a random name if you don't give yourself one. + Chat format was pretty stale? + + Disclaimer + + TherapyLlama is NOT a real therapist. It is a friendly AI that mimics empathy and psychotherapy. It is an illusion without the slightest clue who you are as a person. As much as it can help you with self-discovery, A LLAMA IS NOT A SUBSTITUTE to a real professional. + overrides: + parameters: + model: TherapyLlama-8B-v1-Q4_K_M.gguf + files: + - filename: TherapyLlama-8B-v1-Q4_K_M.gguf + sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a + uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf +- <<: *llama3 + name: "aura-uncensored-l3-8b-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/oiYHWIEHqmgUkY0GsVdDx.png + description: | + This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. + overrides: + parameters: + model: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + files: + - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 + uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf +- &dolphin + name: "dolphin-2.9-llama3-8b" + url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" + urls: + - https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + license: llama3 + description: | + Dolphin-2.9 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling. + Dolphin is uncensored. + Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations + icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png + overrides: + parameters: + model: dolphin-2.9-llama3-8b-q4_K_M.gguf + files: + - filename: dolphin-2.9-llama3-8b-q4_K_M.gguf + sha256: be988199ce28458e97205b11ae9d9cf4e3d8e18ff4c784e75bfc12f54407f1a1 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf +- <<: *dolphin + name: "dolphin-2.9-llama3-8b:Q6_K" + overrides: + parameters: + model: dolphin-2.9-llama3-8b-q6_K.gguf + files: + - filename: dolphin-2.9-llama3-8b-q6_K.gguf + sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf +## LLama2 and derivatives + +### Start noromaid +- &noromaid + url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" + name: "noromaid-13b-0.4-DPO" + icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png + license: cc-by-nc-4.0 + urls: + - https://huggingface.co/NeverSleep/Noromaid-13B-0.4-DPO-GGUF + tags: + - llm + - llama2 + - gguf + - gpu + - cpu + overrides: + parameters: + model: Noromaid-13B-0.4-DPO.q4_k_m.gguf + files: + - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf + sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 + uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" @@ -50,6 +237,7 @@ - multimodal - gguf - gpu + - llama2 - cpu name: "llava-1.6-vicuna" overrides: @@ -117,6 +305,7 @@ - llm - gguf - gpu + - llama2 - cpu name: "phi-2-chat:Q8_0" overrides: @@ -149,6 +338,7 @@ tags: - llm - gguf + - llama2 - gpu - cpu name: "phi-2-orange" @@ -175,6 +365,7 @@ - llm - gguf - gpu + - llama2 - cpu overrides: parameters: @@ -217,6 +408,7 @@ - llm - gguf - gpu + - llama2 - cpu overrides: parameters: @@ -262,6 +454,7 @@ - llm - gguf - gpu + - llama2 - cpu overrides: parameters: @@ -281,6 +474,7 @@ - gpu - cpu - embeddings + - python name: "all-MiniLM-L6-v2" url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" overrides: @@ -302,6 +496,7 @@ tags: - text-to-image - stablediffusion + - python - sd-1.5 - gpu url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" diff --git a/gallery/noromaid.yaml b/gallery/noromaid.yaml new file mode 100644 index 00000000..0b9badfe --- /dev/null +++ b/gallery/noromaid.yaml @@ -0,0 +1,53 @@ +config_file: | + mmap: true + backend: llama-cpp + template: + chat_message: | + <|im_{{if eq .RoleName "assistant"}}bot{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}|> + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }}<|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_system|> + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input -}} + <|im_bot|> + + chat: | + {{.Input -}} + <|im_bot|> + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - + - "\n" + - "\n\n\n" + diff --git a/gallery/parler-tts.yaml b/gallery/parler-tts.yaml new file mode 100644 index 00000000..76252b1d --- /dev/null +++ b/gallery/parler-tts.yaml @@ -0,0 +1,2 @@ +config_file: | + backend: parler-tts diff --git a/gallery/rerankers.yaml b/gallery/rerankers.yaml new file mode 100644 index 00000000..dbbad5a0 --- /dev/null +++ b/gallery/rerankers.yaml @@ -0,0 +1,2 @@ +config_file: | + backend: rerankers \ No newline at end of file diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index d3956b63..c0768051 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -24,8 +24,7 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq {{- else if eq .RoleName "tool" }} -{{- end }} -<|im_end|>` +{{- end }}<|im_end|>` const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> @@ -107,7 +106,7 @@ var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]in var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ "user": { "template": chatML, - "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "user", @@ -122,7 +121,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in }, "assistant": { "template": chatML, - "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "assistant", @@ -137,7 +136,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in }, "function_call": { "template": chatML, - "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n\n<|im_end|>", + "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "assistant", @@ -152,7 +151,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in }, "function_response": { "template": chatML, - "expected": "<|im_start|>tool\n\nResponse from tool\n\n<|im_end|>", + "expected": "<|im_start|>tool\n\nResponse from tool\n<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "tool", From 758b0c904294d397d540cdc31a40de25945beb99 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 06:49:29 +0000 Subject: [PATCH 0369/2895] build(deps): bump pydantic from 1.10.7 to 1.10.13 in /examples/langchain/langchainpy-localai-example in the pip group across 1 directory (#2125) build(deps): bump pydantic Bumps the pip group with 1 update in the /examples/langchain/langchainpy-localai-example directory: [pydantic](https://github.com/pydantic/pydantic). Updates `pydantic` from 1.10.7 to 1.10.13 - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v1.10.7...v1.10.13) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- examples/langchain/langchainpy-localai-example/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index ba7f8429..68a960a6 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -20,7 +20,7 @@ numpy==1.24.3 openai==0.27.6 openapi-schema-pydantic==1.2.4 packaging==23.1 -pydantic==1.10.7 +pydantic==1.10.13 PyYAML==6.0 requests==2.31.0 SQLAlchemy==2.0.12 From 60690c9fc4da2246b006b4a8c95355431d4ec20a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 15:11:01 +0200 Subject: [PATCH 0370/2895] ci: add swagger pipeline Signed-off-by: Ettore Di Giacinto --- .github/workflows/update_swagger.yaml | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/update_swagger.yaml diff --git a/.github/workflows/update_swagger.yaml b/.github/workflows/update_swagger.yaml new file mode 100644 index 00000000..878f5a72 --- /dev/null +++ b/.github/workflows/update_swagger.yaml @@ -0,0 +1,31 @@ +name: Update swagger +on: + schedule: + - cron: 0 20 * * * + workflow_dispatch: +jobs: + swagger: + strategy: + fail-fast: false + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: 'stable' + - run: | + go install github.com/swaggo/swag/cmd/swag@latest + - name: Bump swagger 🔧 + run: | + make swagger + - name: Create Pull Request + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.UPDATE_BOT_TOKEN }} + push-to-fork: ci-forks/LocalAI + commit-message: 'feat(swagger): update swagger' + title: 'feat(swagger): update swagger' + branch: "update/swagger" + body: Update swagger + signoff: true + From aa8e1c63d523ef8333ab01e010629b0848d4ded5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 15:52:52 +0200 Subject: [PATCH 0371/2895] Create yaml-check.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/yaml-check.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/workflows/yaml-check.yml diff --git a/.github/workflows/yaml-check.yml b/.github/workflows/yaml-check.yml new file mode 100644 index 00000000..e2c45718 --- /dev/null +++ b/.github/workflows/yaml-check.yml @@ -0,0 +1,20 @@ +name: json-yaml-validate +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + json-yaml-validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: json-yaml-validate + id: json-yaml-validate + uses: GrantBirki/json-yaml-validate@v2.7.1 From 1b0a64aa46ed5c29828b09452e5ecadc5b71cbbf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 15:57:06 +0200 Subject: [PATCH 0372/2895] Update yaml-check.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/yaml-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/yaml-check.yml b/.github/workflows/yaml-check.yml index e2c45718..f83f03aa 100644 --- a/.github/workflows/yaml-check.yml +++ b/.github/workflows/yaml-check.yml @@ -18,3 +18,5 @@ jobs: - name: json-yaml-validate id: json-yaml-validate uses: GrantBirki/json-yaml-validate@v2.7.1 + with: + base_dir: ./gallery From 5d170e926461d4f73d5e92655b98d50175876268 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 16:05:02 +0200 Subject: [PATCH 0373/2895] Update yaml-check.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/yaml-check.yml | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/.github/workflows/yaml-check.yml b/.github/workflows/yaml-check.yml index f83f03aa..68b5e987 100644 --- a/.github/workflows/yaml-check.yml +++ b/.github/workflows/yaml-check.yml @@ -1,22 +1,18 @@ -name: json-yaml-validate +name: 'Yamllint GitHub Actions' on: - push: - branches: - - main - pull_request: - workflow_dispatch: - -permissions: - contents: read - + - pull_request jobs: - json-yaml-validate: + yamllint: + name: 'Yamllint' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - - name: json-yaml-validate - id: json-yaml-validate - uses: GrantBirki/json-yaml-validate@v2.7.1 + - name: 'Checkout' + uses: actions/checkout@master + - name: 'Yamllint' + uses: karancode/yamllint-github-action@master with: - base_dir: ./gallery + yamllint_file_or_dir: 'gallery' + yamllint_strict: false + yamllint_comment: true + env: + GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 2ada13b1add9cd6eb126517f85f27f395bfdd921 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 16:06:18 +0200 Subject: [PATCH 0374/2895] models(gallery): add more models (#2129) Signed-off-by: Ettore Di Giacinto --- gallery/cerbero.yaml | 19 ++++++ gallery/index.yaml | 139 ++++++++++++++++++++++++++++++++++++++- gallery/vicuna-chat.yaml | 21 ++++++ 3 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 gallery/cerbero.yaml create mode 100644 gallery/vicuna-chat.yaml diff --git a/gallery/cerbero.yaml b/gallery/cerbero.yaml new file mode 100644 index 00000000..265d4019 --- /dev/null +++ b/gallery/cerbero.yaml @@ -0,0 +1,19 @@ +config_file: | + backend: llama-cpp + context_size: 8192 + f16: false + name: cerbero + + template: + completion: "{{.Input}}" + chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] " + roles: + user: "[|Umano|] " + system: "[|Umano|] " + assistant: "[|Assistente|] " + + stopwords: + - "[|Umano|]" + + trimsuffix: + - "\n" diff --git a/gallery/index.yaml b/gallery/index.yaml index a5de760d..8edb9df1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -82,6 +82,65 @@ - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-sauerkrautlm-8b-instruct" + urls: + - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF + icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png + description: | + SauerkrautLM-llama-3-8B-Instruct + + Model Type: Llama-3-SauerkrautLM-8b-Instruct is a finetuned Model based on meta-llama/Meta-Llama-3-8B-Instruct + Language(s): German, English + overrides: + parameters: + model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + files: + - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314 + uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-13b-instruct-v0.1" + urls: + - https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF + icon: https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1/resolve/main/llama-3-merges.webp + description: | + This model is a self-merge of meta-llama/Meta-Llama-3-8B-Instruct model. + overrides: + parameters: + model: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + files: + - filename: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + sha256: 071a28043c271d259b5ffa883d19a9e0b33269b55148c4abaf5f95da4d084266 + uri: huggingface://MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF/Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-smaug-8b" + urls: + - https://huggingface.co/MaziyarPanahi/Llama-3-Smaug-8B-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/64c14f95cac5f9ba52bbcd7f/OrcJyTaUtD2HxJOPPwNva.png + description: | + This model was built using the Smaug recipe for improving performance on real world multi-turn conversations applied to meta-llama/Meta-Llama-3-8B. + overrides: + parameters: + model: Llama-3-Smaug-8B.Q4_K_M.gguf + files: + - filename: Llama-3-Smaug-8B.Q4_K_M.gguf + sha256: b17c4c1144768ead9e8a96439165baf49e98c53d458b4da8827f137fbabf38c1 + uri: huggingface://MaziyarPanahi/Llama-3-Smaug-8B-GGUF/Llama-3-Smaug-8B.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-8b-openhermes-dpo" + urls: + - https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/64fc6d81d75293f417fee1d1/QF2OsDu9DJKP4QYPBu4aK.png + description: | + Llama3-8B-OpenHermes-DPO is DPO-Finetuned model of Llama3-8B, on the OpenHermes-2.5 preference dataset using QLoRA. + overrides: + parameters: + model: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + files: + - filename: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + sha256: 1147e5881cb1d67796916e6cab7dab0ae0f532a4c1e626c9e92861e5f67752ca + uri: huggingface://mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - <<: *llama3 name: "llama-3-unholy-8b" urls: @@ -100,6 +159,42 @@ - filename: Llama-3-Unholy-8B.q4_k_m.gguf sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf +- <<: *llama3 + name: "lexi-llama-3-8b-uncensored" + urls: + - https://huggingface.co/NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/H6axm5mlmiOWnbIFvx_em.png + description: | + Lexi is uncensored, which makes the model compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. + + You are responsible for any content you create using this model. Please use it responsibly. + + Lexi is licensed according to Meta's Llama license. I grant permission for any use, including commercial, that falls within accordance with Meta's Llama-3 license. + overrides: + parameters: + model: lexi-llama-3-8b-uncensored.Q6_K.gguf + files: + - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf + sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 + uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf +- <<: *llama3 + name: "chaos-rp_l3_b-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/u5p9kdbXT2QQA3iMU0vF1.png + description: | + A chaotic force beckons for you, will you heed her call? + + Built upon an intelligent foundation and tuned for roleplaying, this model will fulfill your wildest fantasies with the bare minimum of effort. + + Enjoy! + overrides: + parameters: + model: Chaos_RP_l3_8B-Q4_K_M-imat.gguf + files: + - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf + sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 + uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: @@ -199,7 +294,30 @@ sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf ## LLama2 and derivatives +### Start Fimbulvetr +- &vicuna-chat + url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master" + name: "fimbulvetr-11b-v2" + icon: https://huggingface.co/Sao10K/Fimbulvetr-11B-v2/resolve/main/cute1.jpg + license: llama2 + description: | + Cute girl to catch your attention. + urls: + - https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + overrides: + parameters: + model: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + files: + - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd + uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf ### Start noromaid - &noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" @@ -436,7 +554,26 @@ sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" ### END Hermes-2-Pro-Mistral - +### START Cerbero +- url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" + icon: https://huggingface.co/galatolo/cerbero-7b/resolve/main/README.md.d/cerbero.png + description: | + cerbero-7b is specifically crafted to fill the void in Italy's AI landscape. + urls: + - https://huggingface.co/galatolo/cerbero-7b + tags: + - llm + - gguf + - gpu + - cpu + - mistral + overrides: + parameters: + model: galatolo-Q4_K.gguf + files: + - filename: "galatolo-Q4_K.gguf" + sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" + uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" ### START Codellama - &codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" diff --git a/gallery/vicuna-chat.yaml b/gallery/vicuna-chat.yaml new file mode 100644 index 00000000..9669cce0 --- /dev/null +++ b/gallery/vicuna-chat.yaml @@ -0,0 +1,21 @@ +name: "vicuna-chat" + +description: | + Vicuna chat + +license: "LLaMA" + +config_file: | + backend: llama-cpp + context_size: 4096 + roles: + user: "User: " + system: "System: " + assistant: "Assistant: " + f16: true + template: + completion: | + Complete the following sentence: {{.Input}} + chat: | + {{.Input}} + ASSISTANT: \ No newline at end of file From 4ae4e4450697ae3ff51f159fee4eeab83aeb08c4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:10:08 +0200 Subject: [PATCH 0375/2895] feat(swagger): update swagger (#2128) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- swagger/docs.go | 35 +++++++++++++++++------------------ swagger/swagger.json | 32 ++++++++++++++++---------------- swagger/swagger.yaml | 32 ++++++++++++++++---------------- 3 files changed, 49 insertions(+), 50 deletions(-) diff --git a/swagger/docs.go b/swagger/docs.go index e0199673..cc4fe085 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -1,5 +1,4 @@ -// Code generated by swaggo/swag. DO NOT EDIT. - +// Package swagger Code generated by swaggo/swag. DO NOT EDIT package swagger import "github.com/swaggo/swag" @@ -235,7 +234,7 @@ const docTemplate = `{ } }, "definitions": { - "grammar.Argument": { + "functions.Argument": { "type": "object", "properties": { "properties": { @@ -247,7 +246,7 @@ const docTemplate = `{ } } }, - "grammar.Function": { + "functions.Function": { "type": "object", "properties": { "description": { @@ -262,7 +261,7 @@ const docTemplate = `{ } } }, - "grammar.FunctionName": { + "functions.FunctionName": { "type": "object", "properties": { "const": { @@ -270,18 +269,18 @@ const docTemplate = `{ } } }, - "grammar.Item": { + "functions.Item": { "type": "object", "properties": { "properties": { - "$ref": "#/definitions/grammar.Properties" + "$ref": "#/definitions/functions.Properties" }, "type": { "type": "string" } } }, - "grammar.JSONFunctionStructure": { + "functions.JSONFunctionStructure": { "type": "object", "properties": { "$defs": { @@ -291,33 +290,33 @@ const docTemplate = `{ "anyOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } }, "oneOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } } } }, - "grammar.Properties": { + "functions.Properties": { "type": "object", "properties": { "arguments": { - "$ref": "#/definitions/grammar.Argument" + "$ref": "#/definitions/functions.Argument" }, "function": { - "$ref": "#/definitions/grammar.FunctionName" + "$ref": "#/definitions/functions.FunctionName" } } }, - "grammar.Tool": { + "functions.Tool": { "type": "object", "properties": { "function": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" }, "type": { "type": "string" @@ -565,7 +564,7 @@ const docTemplate = `{ "description": "A list of available functions to call", "type": "array", "items": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" } }, "grammar": { @@ -573,7 +572,7 @@ const docTemplate = `{ "type": "string" }, "grammar_json_functions": { - "$ref": "#/definitions/grammar.JSONFunctionStructure" + "$ref": "#/definitions/functions.JSONFunctionStructure" }, "ignore_eos": { "type": "boolean" @@ -673,7 +672,7 @@ const docTemplate = `{ "tools": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Tool" + "$ref": "#/definitions/functions.Tool" } }, "top_k": { diff --git a/swagger/swagger.json b/swagger/swagger.json index 4d7102c4..d7febeb3 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -227,7 +227,7 @@ } }, "definitions": { - "grammar.Argument": { + "functions.Argument": { "type": "object", "properties": { "properties": { @@ -239,7 +239,7 @@ } } }, - "grammar.Function": { + "functions.Function": { "type": "object", "properties": { "description": { @@ -254,7 +254,7 @@ } } }, - "grammar.FunctionName": { + "functions.FunctionName": { "type": "object", "properties": { "const": { @@ -262,18 +262,18 @@ } } }, - "grammar.Item": { + "functions.Item": { "type": "object", "properties": { "properties": { - "$ref": "#/definitions/grammar.Properties" + "$ref": "#/definitions/functions.Properties" }, "type": { "type": "string" } } }, - "grammar.JSONFunctionStructure": { + "functions.JSONFunctionStructure": { "type": "object", "properties": { "$defs": { @@ -283,33 +283,33 @@ "anyOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } }, "oneOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } } } }, - "grammar.Properties": { + "functions.Properties": { "type": "object", "properties": { "arguments": { - "$ref": "#/definitions/grammar.Argument" + "$ref": "#/definitions/functions.Argument" }, "function": { - "$ref": "#/definitions/grammar.FunctionName" + "$ref": "#/definitions/functions.FunctionName" } } }, - "grammar.Tool": { + "functions.Tool": { "type": "object", "properties": { "function": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" }, "type": { "type": "string" @@ -557,7 +557,7 @@ "description": "A list of available functions to call", "type": "array", "items": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" } }, "grammar": { @@ -565,7 +565,7 @@ "type": "string" }, "grammar_json_functions": { - "$ref": "#/definitions/grammar.JSONFunctionStructure" + "$ref": "#/definitions/functions.JSONFunctionStructure" }, "ignore_eos": { "type": "boolean" @@ -665,7 +665,7 @@ "tools": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Tool" + "$ref": "#/definitions/functions.Tool" } }, "top_k": { diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 86caff8a..919dd896 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -1,6 +1,6 @@ basePath: / definitions: - grammar.Argument: + functions.Argument: properties: properties: additionalProperties: true @@ -8,7 +8,7 @@ definitions: type: type: string type: object - grammar.Function: + functions.Function: properties: description: type: string @@ -18,43 +18,43 @@ definitions: additionalProperties: true type: object type: object - grammar.FunctionName: + functions.FunctionName: properties: const: type: string type: object - grammar.Item: + functions.Item: properties: properties: - $ref: '#/definitions/grammar.Properties' + $ref: '#/definitions/functions.Properties' type: type: string type: object - grammar.JSONFunctionStructure: + functions.JSONFunctionStructure: properties: $defs: additionalProperties: true type: object anyOf: items: - $ref: '#/definitions/grammar.Item' + $ref: '#/definitions/functions.Item' type: array oneOf: items: - $ref: '#/definitions/grammar.Item' + $ref: '#/definitions/functions.Item' type: array type: object - grammar.Properties: + functions.Properties: properties: arguments: - $ref: '#/definitions/grammar.Argument' + $ref: '#/definitions/functions.Argument' function: - $ref: '#/definitions/grammar.FunctionName' + $ref: '#/definitions/functions.FunctionName' type: object - grammar.Tool: + functions.Tool: properties: function: - $ref: '#/definitions/grammar.Function' + $ref: '#/definitions/functions.Function' type: type: string type: object @@ -221,13 +221,13 @@ definitions: functions: description: A list of available functions to call items: - $ref: '#/definitions/grammar.Function' + $ref: '#/definitions/functions.Function' type: array grammar: description: A grammar to constrain the LLM output type: string grammar_json_functions: - $ref: '#/definitions/grammar.JSONFunctionStructure' + $ref: '#/definitions/functions.JSONFunctionStructure' ignore_eos: type: boolean input: {} @@ -297,7 +297,7 @@ definitions: tool_choice: {} tools: items: - $ref: '#/definitions/grammar.Tool' + $ref: '#/definitions/functions.Tool' type: array top_k: type: integer From 45761f8be22075a85134ce79e5070f92430c3f3b Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 25 Apr 2024 13:25:56 -0400 Subject: [PATCH 0376/2895] fix: yamlint warnings and errors (#2131) fix yamlint warnings and errors Signed-off-by: Dave Lee --- .yamllint | 4 + gallery/bert-embeddings.yaml | 7 +- gallery/cerbero.yaml | 3 +- gallery/codellama.yaml | 3 +- gallery/dreamshaper.yaml | 2 +- gallery/hermes-2-pro-mistral.yaml | 3 +- gallery/index.yaml | 528 +++++++++++++++--------------- gallery/llama3-instruct.yaml | 2 +- gallery/llava.yaml | 2 +- gallery/noromaid.yaml | 2 +- gallery/parler-tts.yaml | 1 + gallery/phi-2-chat.yaml | 3 +- gallery/phi-2-orange.yaml | 1 + gallery/phi-3-chat.yaml | 2 +- gallery/piper.yaml | 1 + gallery/rerankers.yaml | 3 +- gallery/sentencetransformers.yaml | 3 +- gallery/stablediffusion.yaml | 79 ++--- gallery/tinydream.yaml | 3 +- gallery/vicuna-chat.yaml | 3 +- gallery/virtual.yaml | 3 +- gallery/whisper-base.yaml | 8 +- 22 files changed, 340 insertions(+), 326 deletions(-) create mode 100644 .yamllint diff --git a/.yamllint b/.yamllint new file mode 100644 index 00000000..8b8a89eb --- /dev/null +++ b/.yamllint @@ -0,0 +1,4 @@ +extends: default + +rules: + line-length: disable \ No newline at end of file diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml index 01f05f33..7ce61799 100644 --- a/gallery/bert-embeddings.yaml +++ b/gallery/bert-embeddings.yaml @@ -1,3 +1,4 @@ +--- name: "bert-embeddings" config_file: | @@ -6,6 +7,6 @@ config_file: | backend: bert-embeddings embeddings: true files: -- filename: "bert-MiniLM-L6-v2q4_0.bin" - sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad" - uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin" \ No newline at end of file + - filename: "bert-MiniLM-L6-v2q4_0.bin" + sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad" + uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin" diff --git a/gallery/cerbero.yaml b/gallery/cerbero.yaml index 265d4019..e3e857b9 100644 --- a/gallery/cerbero.yaml +++ b/gallery/cerbero.yaml @@ -1,3 +1,4 @@ +--- config_file: | backend: llama-cpp context_size: 8192 @@ -15,5 +16,5 @@ config_file: | stopwords: - "[|Umano|]" - trimsuffix: + trimsuffix: - "\n" diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml index a4c3233f..b02ad87e 100644 --- a/gallery/codellama.yaml +++ b/gallery/codellama.yaml @@ -1,7 +1,8 @@ +--- name: "codellama" config_file: | backend: llama-cpp context_size: 4096 f16: true - mmap: true \ No newline at end of file + mmap: true diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml index 219a1e53..03146280 100644 --- a/gallery/dreamshaper.yaml +++ b/gallery/dreamshaper.yaml @@ -1,6 +1,6 @@ +--- name: "dreamshaper" - config_file: | backend: diffusers step: 25 diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml index b1dc0ff1..6abee631 100644 --- a/gallery/hermes-2-pro-mistral.yaml +++ b/gallery/hermes-2-pro-mistral.yaml @@ -1,6 +1,6 @@ +--- name: "hermes-2-pro-mistral" - config_file: | mmap: true template: @@ -52,4 +52,3 @@ config_file: | - - "\n" - "\n\n\n" - diff --git a/gallery/index.yaml b/gallery/index.yaml index 8edb9df1..56e434c5 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,4 @@ - +--- ### START parler-tts - &parler-tts url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" @@ -9,13 +9,13 @@ description: | Parler-TTS is a lightweight text-to-speech (TTS) model that can generate high-quality, natural sounding speech in the style of a given speaker (gender, pitch, speaking style, etc). It is a reproduction of work from the paper Natural language guidance of high-fidelity text-to-speech with synthetic annotations by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. urls: - - https://github.com/huggingface/parler-tts + - https://github.com/huggingface/parler-tts tags: - - tts - - gpu - - cpu - - text-to-speech - - python + - tts + - gpu + - cpu + - text-to-speech + - python ### START rerankers - &rerankers url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" @@ -26,9 +26,9 @@ description: | A cross-encoder model that can be used for reranking tags: - - reranker - - gpu - - python + - reranker + - gpu + - python ## LLMs ### START LLAMA3 - &llama3 @@ -49,43 +49,43 @@ Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. urls: - - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF + - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct + - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF tags: - - llm - - gguf - - gpu - - cpu - - llama3 + - llm + - gguf + - gpu + - cpu + - llama3 overrides: parameters: model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf files: - - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf - sha256: 19ded996fe6c60254dc7544d782276eff41046ed42aa5f2d0005dc457e5c0895 - uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf + - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf + sha256: 19ded996fe6c60254dc7544d782276eff41046ed42aa5f2d0005dc457e5c0895 + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf - <<: *llama3 name: "llama3-8b-instruct:Q6_K" overrides: parameters: model: Meta-Llama-3-8B-Instruct.Q6_K.gguf files: - - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf - sha256: b7bad45618e2a76cc1e89a0fbb93a2cac9bf410e27a619c8024ed6db53aa9b4a - uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf + - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf + sha256: b7bad45618e2a76cc1e89a0fbb93a2cac9bf410e27a619c8024ed6db53aa9b4a + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf - <<: *llama3 name: "llama3-70b-instruct" overrides: parameters: model: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf files: - - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf - sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 - uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf - <<: *llama3 name: "llama-3-sauerkrautlm-8b-instruct" urls: - - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF + - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png description: | SauerkrautLM-llama-3-8B-Instruct @@ -96,13 +96,13 @@ parameters: model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf files: - - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf - sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314 - uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314 + uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf - <<: *llama3 name: "llama-3-13b-instruct-v0.1" urls: - - https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF + - https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF icon: https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1/resolve/main/llama-3-merges.webp description: | This model is a self-merge of meta-llama/Meta-Llama-3-8B-Instruct model. @@ -110,13 +110,13 @@ parameters: model: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf files: - - filename: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf - sha256: 071a28043c271d259b5ffa883d19a9e0b33269b55148c4abaf5f95da4d084266 - uri: huggingface://MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF/Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + - filename: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + sha256: 071a28043c271d259b5ffa883d19a9e0b33269b55148c4abaf5f95da4d084266 + uri: huggingface://MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF/Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf - <<: *llama3 name: "llama-3-smaug-8b" urls: - - https://huggingface.co/MaziyarPanahi/Llama-3-Smaug-8B-GGUF + - https://huggingface.co/MaziyarPanahi/Llama-3-Smaug-8B-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/64c14f95cac5f9ba52bbcd7f/OrcJyTaUtD2HxJOPPwNva.png description: | This model was built using the Smaug recipe for improving performance on real world multi-turn conversations applied to meta-llama/Meta-Llama-3-8B. @@ -124,13 +124,13 @@ parameters: model: Llama-3-Smaug-8B.Q4_K_M.gguf files: - - filename: Llama-3-Smaug-8B.Q4_K_M.gguf - sha256: b17c4c1144768ead9e8a96439165baf49e98c53d458b4da8827f137fbabf38c1 - uri: huggingface://MaziyarPanahi/Llama-3-Smaug-8B-GGUF/Llama-3-Smaug-8B.Q4_K_M.gguf + - filename: Llama-3-Smaug-8B.Q4_K_M.gguf + sha256: b17c4c1144768ead9e8a96439165baf49e98c53d458b4da8827f137fbabf38c1 + uri: huggingface://MaziyarPanahi/Llama-3-Smaug-8B-GGUF/Llama-3-Smaug-8B.Q4_K_M.gguf - <<: *llama3 name: "llama-3-8b-openhermes-dpo" urls: - - https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF + - https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/64fc6d81d75293f417fee1d1/QF2OsDu9DJKP4QYPBu4aK.png description: | Llama3-8B-OpenHermes-DPO is DPO-Finetuned model of Llama3-8B, on the OpenHermes-2.5 preference dataset using QLoRA. @@ -138,13 +138,13 @@ parameters: model: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf files: - - filename: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - sha256: 1147e5881cb1d67796916e6cab7dab0ae0f532a4c1e626c9e92861e5f67752ca - uri: huggingface://mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + - filename: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + sha256: 1147e5881cb1d67796916e6cab7dab0ae0f532a4c1e626c9e92861e5f67752ca + uri: huggingface://mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - <<: *llama3 name: "llama-3-unholy-8b" urls: - - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png description: | Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. @@ -156,13 +156,13 @@ parameters: model: Llama-3-Unholy-8B.q4_k_m.gguf files: - - filename: Llama-3-Unholy-8B.q4_k_m.gguf - sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 - uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf + - filename: Llama-3-Unholy-8B.q4_k_m.gguf + sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf - <<: *llama3 name: "lexi-llama-3-8b-uncensored" urls: - - https://huggingface.co/NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF + - https://huggingface.co/NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/H6axm5mlmiOWnbIFvx_em.png description: | Lexi is uncensored, which makes the model compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. @@ -174,13 +174,13 @@ parameters: model: lexi-llama-3-8b-uncensored.Q6_K.gguf files: - - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf - sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 - uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf + - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf + sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 + uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf - <<: *llama3 name: "chaos-rp_l3_b-iq-imatrix" urls: - - https://huggingface.co/Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix + - https://huggingface.co/Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/u5p9kdbXT2QQA3iMU0vF1.png description: | A chaotic force beckons for you, will you heed her call? @@ -192,13 +192,13 @@ parameters: model: Chaos_RP_l3_8B-Q4_K_M-imat.gguf files: - - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf - sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 - uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf + - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf + sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 + uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: - - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png description: | Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. @@ -210,20 +210,20 @@ parameters: model: Llama-3-Unholy-8B.q8_0.gguf files: - - filename: Llama-3-Unholy-8B.q8_0.gguf - sha256: 8d4137018acdcd57df4beccc84d9ad3f7f08cac50588f76370afc16c85752702 - uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf + - filename: Llama-3-Unholy-8B.q8_0.gguf + sha256: 8d4137018acdcd57df4beccc84d9ad3f7f08cac50588f76370afc16c85752702 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf - <<: *llama3 name: "therapyllama-8b-v1" urls: - - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF + - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/65f07d05279d2d8f725bf0c3/A-ckcZ9H0Ee1n_ls2FM41.png description: | Trained on Llama 3 8B using a modified version of jerryjalapeno/nart-100k-synthetic. It is a Llama 3 version of https://huggingface.co/victunes/TherapyBeagle-11B-v2 - TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. + TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. Usage: Do not hold back on Buddy. Open up to Buddy. @@ -243,56 +243,56 @@ parameters: model: TherapyLlama-8B-v1-Q4_K_M.gguf files: - - filename: TherapyLlama-8B-v1-Q4_K_M.gguf - sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a - uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf + - filename: TherapyLlama-8B-v1-Q4_K_M.gguf + sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a + uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf - <<: *llama3 name: "aura-uncensored-l3-8b-iq-imatrix" urls: - - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix + - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/oiYHWIEHqmgUkY0GsVdDx.png description: | - This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. + This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. overrides: parameters: model: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf files: - - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf - sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 - uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 + uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf - &dolphin name: "dolphin-2.9-llama3-8b" url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" urls: - - https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf + - https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf tags: - - llm - - gguf - - gpu - - cpu - - llama3 + - llm + - gguf + - gpu + - cpu + - llama3 license: llama3 description: | Dolphin-2.9 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling. - Dolphin is uncensored. + Dolphin is uncensored. Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png overrides: parameters: model: dolphin-2.9-llama3-8b-q4_K_M.gguf files: - - filename: dolphin-2.9-llama3-8b-q4_K_M.gguf - sha256: be988199ce28458e97205b11ae9d9cf4e3d8e18ff4c784e75bfc12f54407f1a1 - uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf + - filename: dolphin-2.9-llama3-8b-q4_K_M.gguf + sha256: be988199ce28458e97205b11ae9d9cf4e3d8e18ff4c784e75bfc12f54407f1a1 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf - <<: *dolphin name: "dolphin-2.9-llama3-8b:Q6_K" overrides: parameters: model: dolphin-2.9-llama3-8b-q6_K.gguf files: - - filename: dolphin-2.9-llama3-8b-q6_K.gguf - sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 - uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf + - filename: dolphin-2.9-llama3-8b-q6_K.gguf + sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf ## LLama2 and derivatives ### Start Fimbulvetr - &vicuna-chat @@ -304,20 +304,20 @@ description: | Cute girl to catch your attention. urls: - - https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF + - https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF tags: - - llm - - gguf - - gpu - - cpu - - llama3 + - llm + - gguf + - gpu + - cpu + - llama3 overrides: parameters: model: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf files: - - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf - sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd - uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd + uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf ### Start noromaid - &noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" @@ -325,48 +325,48 @@ icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png license: cc-by-nc-4.0 urls: - - https://huggingface.co/NeverSleep/Noromaid-13B-0.4-DPO-GGUF + - https://huggingface.co/NeverSleep/Noromaid-13B-0.4-DPO-GGUF tags: - - llm - - llama2 - - gguf - - gpu - - cpu + - llm + - llama2 + - gguf + - gpu + - cpu overrides: parameters: model: Noromaid-13B-0.4-DPO.q4_k_m.gguf files: - - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf - sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 - uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf + - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf + sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 + uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" license: apache-2.0 description: | - LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. + LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. urls: - - https://llava-vl.github.io/ + - https://llava-vl.github.io/ tags: - - llm - - multimodal - - gguf - - gpu - - llama2 - - cpu + - llm + - multimodal + - gguf + - gpu + - llama2 + - cpu name: "llava-1.6-vicuna" overrides: mmproj: mmproj-vicuna7b-f16.gguf parameters: model: vicuna-7b-q5_k.gguf files: - - filename: vicuna-7b-q5_k.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf - - filename: mmproj-vicuna7b-f16.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf + - filename: vicuna-7b-q5_k.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf + - filename: mmproj-vicuna7b-f16.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf - <<: *llava name: "llava-1.6-mistral" overrides: @@ -374,12 +374,12 @@ parameters: model: llava-v1.6-mistral-7b.gguf files: - - filename: llava-v1.6-mistral-7b.gguf - sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595 - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf - - filename: llava-v1.6-7b-mmproj-f16.gguf - sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16 - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf + - filename: llava-v1.6-mistral-7b.gguf + sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf + - filename: llava-v1.6-7b-mmproj-f16.gguf + sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf - <<: *llava name: "llava-1.5" overrides: @@ -387,12 +387,12 @@ parameters: model: llava-v1.5-7b-Q4_K.gguf files: - - filename: llava-v1.5-7b-Q4_K.gguf - sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9 - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf - - filename: llava-v1.5-7b-mmproj-Q8_0.gguf - sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf + - filename: llava-v1.5-7b-Q4_K.gguf + sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9 + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf + - filename: llava-v1.5-7b-mmproj-Q8_0.gguf + sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf ### START Phi-2 - &phi-2-chat url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" @@ -416,32 +416,32 @@ Finetuned from model: Phi-2 urls: - - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml - - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu name: "phi-2-chat:Q8_0" overrides: parameters: model: phi-2-layla-v1-chatml-Q8_0.gguf files: - - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" - sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" - uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" + - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" + sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" - <<: *phi-2-chat name: "phi-2-chat" overrides: parameters: model: phi-2-layla-v1-chatml-Q4_K.gguf files: - - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" - sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" - uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" + - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" + sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" - <<: *phi-2-chat license: mit icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" @@ -450,23 +450,23 @@ There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. urls: - - https://huggingface.co/rhysjones/phi-2-orange - - https://huggingface.co/TheBloke/phi-2-orange-GGUF + - https://huggingface.co/rhysjones/phi-2-orange + - https://huggingface.co/TheBloke/phi-2-orange-GGUF tags: - - llm - - gguf - - llama2 - - gpu - - cpu + - llm + - gguf + - llama2 + - gpu + - cpu name: "phi-2-orange" overrides: parameters: model: phi-2-orange.Q4_0.gguf files: - - filename: "phi-2-orange.Q4_0.gguf" - sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf" - uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf" + - filename: "phi-2-orange.Q4_0.gguf" + sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf" + uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf" ### START Phi-3 - &phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" @@ -477,30 +477,30 @@ The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. urls: - - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf + - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu overrides: parameters: model: Phi-3-mini-4k-instruct-q4.gguf files: - - filename: "Phi-3-mini-4k-instruct-q4.gguf" - sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" - uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" + - filename: "Phi-3-mini-4k-instruct-q4.gguf" + sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" - <<: *phi-3 name: "phi-3-mini-4k-instruct:fp16" overrides: parameters: model: Phi-3-mini-4k-instruct-fp16.gguf files: - - filename: "Phi-3-mini-4k-instruct-fp16.gguf" - sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" - uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" + - filename: "Phi-3-mini-4k-instruct-fp16.gguf" + sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" ### START Hermes-2-Pro-Mistral - &hermes-2-pro-mistral url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" @@ -520,39 +520,39 @@ Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main urls: - - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF + - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf files: - - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf" - sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf" + - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf" + sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf" - <<: *hermes-2-pro-mistral name: "hermes-2-pro-mistral:Q6_K" overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf files: - - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf" - sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" + - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf" + sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" - <<: *hermes-2-pro-mistral name: "hermes-2-pro-mistral:Q8_0" overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf files: - - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" - sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" + - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" + sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" ### END Hermes-2-Pro-Mistral ### START Cerbero - url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" @@ -560,20 +560,20 @@ description: | cerbero-7b is specifically crafted to fill the void in Italy's AI landscape. urls: - - https://huggingface.co/galatolo/cerbero-7b + - https://huggingface.co/galatolo/cerbero-7b tags: - - llm - - gguf - - gpu - - cpu - - mistral + - llm + - gguf + - gpu + - cpu + - mistral overrides: parameters: model: galatolo-Q4_K.gguf files: - - filename: "galatolo-Q4_K.gguf" - sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" - uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" + - filename: "galatolo-Q4_K.gguf" + sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" + uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" ### START Codellama - &codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" @@ -584,34 +584,34 @@ Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. urls: - - https://huggingface.co/TheBloke/CodeLlama-7B-GGUF - - https://huggingface.co/meta-llama/CodeLlama-7b-hf + - https://huggingface.co/TheBloke/CodeLlama-7B-GGUF + - https://huggingface.co/meta-llama/CodeLlama-7b-hf tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu overrides: parameters: model: codellama-7b.Q4_0.gguf files: - - filename: "codellama-7b.Q4_0.gguf" - sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" - uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" + - filename: "codellama-7b.Q4_0.gguf" + sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" + uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" ### START Embeddings - &sentencentransformers description: | This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. urls: - - https://github.com/UKPLab/sentence-transformers + - https://github.com/UKPLab/sentence-transformers tags: - - gpu - - cpu - - embeddings - - python + - gpu + - cpu + - embeddings + - python name: "all-MiniLM-L6-v2" url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" overrides: @@ -628,42 +628,42 @@ A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. urls: - - https://civitai.com/models/4384/dreamshaper + - https://civitai.com/models/4384/dreamshaper tags: - - text-to-image - - stablediffusion - - python - - sd-1.5 - - gpu + - text-to-image + - stablediffusion + - python + - sd-1.5 + - gpu url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" overrides: parameters: model: DreamShaper_8_pruned.safetensors files: - - filename: DreamShaper_8_pruned.safetensors - uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors - sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd + - filename: DreamShaper_8_pruned.safetensors + uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors + sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd ## Whisper - url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" urls: - - https://github.com/ggerganov/whisper.cpp - - https://huggingface.co/ggerganov/whisper.cpp + - https://github.com/ggerganov/whisper.cpp + - https://huggingface.co/ggerganov/whisper.cpp description: | Port of OpenAI's Whisper model in C/C++ - + ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" license: "Apache 2.0" urls: - - https://huggingface.co/skeskinen/ggml + - https://huggingface.co/skeskinen/ggml tags: - - embeddings + - embeddings description: | Bert model that can be used for embeddings @@ -671,13 +671,13 @@ - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master license: "BSD-3" urls: - - https://github.com/EdVince/Stable-Diffusion-NCNN - - https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + - https://github.com/EdVince/Stable-Diffusion-NCNN + - https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE description: | - Stable Diffusion in NCNN with c++, supported txt2img and img2img + Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp - + ## Tiny Dream - url: github:mudler/LocalAI/gallery/tinydream.yaml@master name: tinydream @@ -702,9 +702,9 @@ A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). tags: - - tts - - text-to-speech - - cpu + - tts + - text-to-speech + - cpu override: parameters: @@ -786,7 +786,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-thorsten-low - + override: parameters: model: de-thorsten-low.onnx @@ -796,7 +796,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-el-gr-rapunzelina-low - + override: parameters: model: el-gr-rapunzelina-low.onnx @@ -806,7 +806,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-alan-low - + override: parameters: model: en-gb-alan-low.onnx @@ -816,7 +816,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-southern_english_female-low - + override: parameters: model: en-gb-southern_english @@ -826,7 +826,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-amy-low - + override: parameters: model: en-us-amy-low.onnx @@ -836,7 +836,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-danny-low - + override: parameters: model: en-us-danny-low.onnx @@ -846,7 +846,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low - + override: parameters: model: en-us-kathleen-low.onnx @@ -856,7 +856,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-low - + override: parameters: model: en-us-lessac-low.onnx @@ -866,7 +866,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-medium - + override: parameters: model: en-us-lessac-medium.onnx @@ -876,7 +876,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-libritts-high - + override: parameters: model: en-us-libritts-high.onnx @@ -886,7 +886,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-high - + override: parameters: model: en-us-ryan-high.onnx @@ -896,7 +896,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-low - + override: parameters: model: en-us-ryan-low.onnx @@ -907,7 +907,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-medium - + override: parameters: model: en-us-ryan-medium.onnx @@ -938,7 +938,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_10246-low - + override: parameters: model: es-mls_10246-low.onnx @@ -949,7 +949,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_9972-low - + override: parameters: model: es-mls_9972-low.onnx @@ -960,7 +960,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fi-harri-low - + override: parameters: model: fi-harri-low.onnx @@ -971,7 +971,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-gilles-low - + override: parameters: model: fr-gilles-low.onnx @@ -982,7 +982,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-mls_1840-low - + override: parameters: model: fr-mls_1840-low.onnx @@ -993,7 +993,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-low - + override: parameters: model: fr-siwis-low.onnx @@ -1004,7 +1004,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-medium - + override: parameters: model: fr-siwis-medium.onnx @@ -1015,7 +1015,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-bui-medium - + override: parameters: model: is-bui-medium.onnx @@ -1026,7 +1026,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-salka-medium - + override: parameters: model: is-salka-medium.onnx @@ -1037,7 +1037,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-steinn-medium - + override: parameters: model: is-steinn-medium.onnx @@ -1048,7 +1048,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-ugla-medium - + override: parameters: model: is-ugla-medium.onnx @@ -1059,7 +1059,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-it-riccardo_fasol-x-low - + override: parameters: model: it-riccardo_fasol-x-low.onnx @@ -1070,7 +1070,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-iseke-x-low - + override: parameters: model: kk-iseke-x-low.onnx @@ -1081,7 +1081,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-issai-high - + override: parameters: model: kk-issai-high.onnx @@ -1092,7 +1092,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-raya-x-low - + override: parameters: model: kk-raya-x-low.onnx @@ -1103,7 +1103,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-medium - + override: parameters: model: ne-google-medium.onnx @@ -1114,7 +1114,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-x-low - + override: parameters: model: ne-google-x-low.onnx @@ -1125,7 +1125,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_5809-low - + override: parameters: model: nl-mls_5809-low.onnx @@ -1136,7 +1136,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_7432-low - + override: parameters: model: nl-mls_7432-low.onnx @@ -1147,7 +1147,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-nathalie-x-low - + override: parameters: model: nl-nathalie-x-low.onnx @@ -1158,7 +1158,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-medium - + override: parameters: model: nl-rdh-medium.onnx @@ -1169,7 +1169,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-x-low - + override: parameters: model: nl-rdh-x-low.onnx @@ -1180,7 +1180,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-no-talesyntese-medium - + override: parameters: model: no-talesyntese-medium.onnx @@ -1191,7 +1191,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pl-mls_6892-low - + override: parameters: model: pl-mls_6892-low.onnx @@ -1202,7 +1202,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pt-br-edresson-low - + override: parameters: model: pt-br-edresson-low.onnx @@ -1213,7 +1213,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ru-irinia-medium - + override: parameters: model: ru-irinia-medium.onnx @@ -1224,7 +1224,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-sv-se-nst-medium - + override: parameters: model: sv-se-nst-medium.onnx @@ -1235,7 +1235,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-uk-lada-x-low - + override: parameters: model: uk-lada-x-low.onnx @@ -1246,7 +1246,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-25hours-single-low - + override: parameters: model: vi-25hours-single-low.onnx @@ -1257,7 +1257,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-vivos-x-low - + override: parameters: model: vi-vivos-x-low.onnx @@ -1268,7 +1268,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh-cn-huayan-x-low - + override: parameters: model: zh-cn-huayan-x-low.onnx @@ -1279,10 +1279,10 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh_CN-huayan-medium - + override: parameters: model: zh_CN-huayan-medium.onnx files: - filename: voice-zh_CN-huayan-medium.tar.gz - uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz \ No newline at end of file + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml index 96272c58..f6016cbd 100644 --- a/gallery/llama3-instruct.yaml +++ b/gallery/llama3-instruct.yaml @@ -1,6 +1,6 @@ +--- name: "llama3-instruct" - config_file: | mmap: true template: diff --git a/gallery/llava.yaml b/gallery/llava.yaml index 44c1aa97..4d07847a 100644 --- a/gallery/llava.yaml +++ b/gallery/llava.yaml @@ -1,6 +1,6 @@ +--- name: "llava" - config_file: | backend: llama-cpp context_size: 4096 diff --git a/gallery/noromaid.yaml b/gallery/noromaid.yaml index 0b9badfe..4772e4ec 100644 --- a/gallery/noromaid.yaml +++ b/gallery/noromaid.yaml @@ -1,3 +1,4 @@ +--- config_file: | mmap: true backend: llama-cpp @@ -50,4 +51,3 @@ config_file: | - - "\n" - "\n\n\n" - diff --git a/gallery/parler-tts.yaml b/gallery/parler-tts.yaml index 76252b1d..98d4614b 100644 --- a/gallery/parler-tts.yaml +++ b/gallery/parler-tts.yaml @@ -1,2 +1,3 @@ +--- config_file: | backend: parler-tts diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml index 3fc84d3b..5e1fb702 100644 --- a/gallery/phi-2-chat.yaml +++ b/gallery/phi-2-chat.yaml @@ -1,6 +1,6 @@ +--- name: "phi-2-chatml" - config_file: | mmap: true template: @@ -16,4 +16,3 @@ config_file: | f16: true stopwords: - <|im_end|> - diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml index 645875ad..89971b4d 100644 --- a/gallery/phi-2-orange.yaml +++ b/gallery/phi-2-orange.yaml @@ -1,3 +1,4 @@ +--- name: "phi-2-orange" config_file: | diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml index b17e5bb4..ede4fd0f 100644 --- a/gallery/phi-3-chat.yaml +++ b/gallery/phi-3-chat.yaml @@ -1,3 +1,4 @@ +--- name: "phi-3-chat" config_file: | @@ -15,4 +16,3 @@ config_file: | f16: true stopwords: - <|end|> - diff --git a/gallery/piper.yaml b/gallery/piper.yaml index eb1a6ecc..c7f40f8c 100644 --- a/gallery/piper.yaml +++ b/gallery/piper.yaml @@ -1,2 +1,3 @@ +--- config_file: | backend: piper diff --git a/gallery/rerankers.yaml b/gallery/rerankers.yaml index dbbad5a0..a4ac48ca 100644 --- a/gallery/rerankers.yaml +++ b/gallery/rerankers.yaml @@ -1,2 +1,3 @@ +--- config_file: | - backend: rerankers \ No newline at end of file + backend: rerankers diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml index 9ba5d29b..e8ba7aa0 100644 --- a/gallery/sentencetransformers.yaml +++ b/gallery/sentencetransformers.yaml @@ -1,4 +1,5 @@ +--- name: "sentencetransformers" config_file: | - backend: sentencetransformers \ No newline at end of file + backend: sentencetransformers diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml index 9b1cad32..f84de7f2 100644 --- a/gallery/stablediffusion.yaml +++ b/gallery/stablediffusion.yaml @@ -1,3 +1,4 @@ +--- name: "stablediffusion-cpp" config_file: | @@ -7,42 +8,42 @@ config_file: | model: stablediffusion_assets files: -- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" - sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" -- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" - sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" -- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" - sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" -- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" - sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" -- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" - sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" -- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" - sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" -- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" - sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" -- filename: "stablediffusion_assets/log_sigmas.bin" - sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" -- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" - sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" -- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" - sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" -- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" - sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" -- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" - sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" -- filename: "stablediffusion_assets/vocab.txt" - sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" \ No newline at end of file + - filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" + - filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" + - filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" + - filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" + - filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" + - filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" + - filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" + - filename: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" + - filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" + - filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" + - filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" + - filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" + - filename: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml index 6e39414c..e4a79ad7 100644 --- a/gallery/tinydream.yaml +++ b/gallery/tinydream.yaml @@ -1,3 +1,4 @@ +--- name: "tinydream" config_file: | @@ -33,4 +34,4 @@ files: uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param" - filename: "tinydream_assets/vocab.txt" sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" \ No newline at end of file + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" diff --git a/gallery/vicuna-chat.yaml b/gallery/vicuna-chat.yaml index 9669cce0..05600e66 100644 --- a/gallery/vicuna-chat.yaml +++ b/gallery/vicuna-chat.yaml @@ -1,3 +1,4 @@ +--- name: "vicuna-chat" description: | @@ -18,4 +19,4 @@ config_file: | Complete the following sentence: {{.Input}} chat: | {{.Input}} - ASSISTANT: \ No newline at end of file + ASSISTANT: diff --git a/gallery/virtual.yaml b/gallery/virtual.yaml index 054c3257..22e3e546 100644 --- a/gallery/virtual.yaml +++ b/gallery/virtual.yaml @@ -1,6 +1,7 @@ +--- name: "virtual" description: | A Base model definition -license: "N/A" \ No newline at end of file +license: "N/A" diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml index f654a37c..2dc24d6e 100644 --- a/gallery/whisper-base.yaml +++ b/gallery/whisper-base.yaml @@ -1,12 +1,12 @@ +--- name: "whisper-base" - config_file: | backend: whisper parameters: model: ggml-whisper-base.bin files: -- filename: "ggml-whisper-base.bin" - sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" - uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file + - filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" From d98063e80e3bb4685ae681ea443992ba65f8acbc Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:06:22 -0500 Subject: [PATCH 0377/2895] fix: api key polling was not using correct filepath (#2132) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- core/startup/config_file_watcher.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 5f6834d4..800059d0 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path" + "path/filepath" "time" "github.com/fsnotify/fsnotify" @@ -50,6 +51,7 @@ func (c *configFileHandler) Register(filename string, handler fileHandler, runNo } func (c *configFileHandler) callHandler(filename string, handler fileHandler) { + log.Trace().Str("filename", filename).Msg("reading file for dynamic config update") fileContent, err := os.ReadFile(filename) if err != nil && !os.IsNotExist(err) { log.Error().Err(err).Str("filename", filename).Msg("could not read file") @@ -75,7 +77,7 @@ func (c *configFileHandler) Watch() error { <-ticker.C for file, handler := range c.handlers { log.Debug().Str("file", file).Msg("polling config file") - c.callHandler(file, handler) + c.callHandler(filepath.Join(c.appConfig.DynamicConfigsDir, file), handler) } } }() @@ -122,7 +124,8 @@ func (c *configFileHandler) Stop() { func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { - log.Debug().Msg("processing api_keys.json") + log.Debug().Msg("processing api keys runtime update") + log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup") if len(fileContent) > 0 { // Parse JSON content from the file @@ -132,11 +135,14 @@ func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { return err } + log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile") + appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...) } else { + log.Trace().Msg("no API keys discovered from dynamic config file") appConfig.ApiKeys = startupAppConfig.ApiKeys } - log.Debug().Msg("api keys loaded from api_keys.json") + log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing") return nil } From 5fceb876c4a786e76ab1dfc42e1fe6c7a95c1eee Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:40:41 +0200 Subject: [PATCH 0378/2895] :arrow_up: Update ggerganov/llama.cpp (#2133) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b017982e..eae8410a 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8 +CPPLLAMA_VERSION?=46e12c4692a37bdd31a0432fc5153d7d22bc7f72 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 365ef92530d1d24479535e5a232986ad6a764cf5 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:41:38 +0200 Subject: [PATCH 0379/2895] :arrow_up: Update mudler/go-stable-diffusion (#2134) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eae8410a..7d64ad03 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 # stablediffusion version -STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485 +STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568 # tinydream version TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293 From c8dd8e5ef492900453387a7cbc3da3b508c45715 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 25 Apr 2024 18:47:06 -0400 Subject: [PATCH 0380/2895] fix: reduce chmod permissions for created files and directories (#2137) quiet more security scanner issues: pass one of chmod restriction to remove group and other permissions Signed-off-by: Dave Lee --- core/backend/options.go | 2 +- core/backend/tts.go | 2 +- core/http/app.go | 10 +++++----- core/http/app_test.go | 6 +++--- core/http/endpoints/openai/assistant_test.go | 13 +++++++------ core/http/endpoints/openai/files_test.go | 2 +- core/startup/startup.go | 8 ++++---- pkg/assets/extract.go | 6 +++--- pkg/downloader/uri.go | 2 +- pkg/gallery/models.go | 8 ++++---- pkg/gallery/models_test.go | 2 +- pkg/model/process.go | 2 +- pkg/templates/cache_test.go | 4 ++-- pkg/utils/config.go | 2 +- tests/integration/stores_test.go | 2 +- 15 files changed, 36 insertions(+), 35 deletions(-) diff --git a/core/backend/options.go b/core/backend/options.go index 60cb01ff..bbb9990d 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -109,7 +109,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption promptCachePath := "" if c.PromptCachePath != "" { p := filepath.Join(modelPath, c.PromptCachePath) - os.MkdirAll(filepath.Dir(p), 0755) + os.MkdirAll(filepath.Dir(p), 0750) promptCachePath = p } diff --git a/core/backend/tts.go b/core/backend/tts.go index f97b6202..4532cf00 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -53,7 +53,7 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, return "", nil, fmt.Errorf("could not load piper model") } - if err := os.MkdirAll(appConfig.AudioDir, 0755); err != nil { + if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil { return "", nil, fmt.Errorf("failed creating audio directory: %s", err) } diff --git a/core/http/app.go b/core/http/app.go index 93eb0e20..bd740410 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -175,11 +175,11 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi } // Make sure directories exists - os.MkdirAll(appConfig.ImageDir, 0755) - os.MkdirAll(appConfig.AudioDir, 0755) - os.MkdirAll(appConfig.UploadDir, 0755) - os.MkdirAll(appConfig.ConfigsDir, 0755) - os.MkdirAll(appConfig.ModelPath, 0755) + os.MkdirAll(appConfig.ImageDir, 0750) + os.MkdirAll(appConfig.AudioDir, 0750) + os.MkdirAll(appConfig.UploadDir, 0750) + os.MkdirAll(appConfig.ConfigsDir, 0750) + os.MkdirAll(appConfig.ModelPath, 0750) // Load config jsons utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) diff --git a/core/http/app_test.go b/core/http/app_test.go index 3699c0ed..f4728770 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -222,7 +222,7 @@ var _ = Describe("API test", func() { modelDir = filepath.Join(tmpdir, "models") backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0755) + err = os.Mkdir(backendAssetsDir, 0750) Expect(err).ToNot(HaveOccurred()) c, cancel = context.WithCancel(context.Background()) @@ -241,7 +241,7 @@ var _ = Describe("API test", func() { } out, err := yaml.Marshal(g) Expect(err).ToNot(HaveOccurred()) - err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0644) + err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0600) Expect(err).ToNot(HaveOccurred()) galleries := []gallery.Gallery{ @@ -595,7 +595,7 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred()) modelDir = filepath.Join(tmpdir, "models") backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0755) + err = os.Mkdir(backendAssetsDir, 0750) Expect(err).ToNot(HaveOccurred()) c, cancel = context.WithCancel(context.Background()) diff --git a/core/http/endpoints/openai/assistant_test.go b/core/http/endpoints/openai/assistant_test.go index bdc41dda..e7c09033 100644 --- a/core/http/endpoints/openai/assistant_test.go +++ b/core/http/endpoints/openai/assistant_test.go @@ -3,10 +3,6 @@ package openai import ( "encoding/json" "fmt" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/pkg/model" - "github.com/gofiber/fiber/v2" - "github.com/stretchr/testify/assert" "io" "io/ioutil" "net/http" @@ -16,6 +12,11 @@ import ( "strings" "testing" "time" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/assert" ) var configsDir string = "/tmp/localai/configs" @@ -49,8 +50,8 @@ func TestAssistantEndpoints(t *testing.T) { } _ = os.RemoveAll(appConfig.ConfigsDir) - _ = os.MkdirAll(appConfig.ConfigsDir, 0755) - _ = os.MkdirAll(modelPath, 0755) + _ = os.MkdirAll(appConfig.ConfigsDir, 0750) + _ = os.MkdirAll(modelPath, 0750) os.Create(filepath.Join(modelPath, "ggml-gpt4all-j")) app := fiber.New(fiber.Config{ diff --git a/core/http/endpoints/openai/files_test.go b/core/http/endpoints/openai/files_test.go index fc77ae45..2d0be7b9 100644 --- a/core/http/endpoints/openai/files_test.go +++ b/core/http/endpoints/openai/files_test.go @@ -251,7 +251,7 @@ func newMultipartFile(filePath, tag, purpose string) (*strings.Reader, *multipar // Helper to create test files func createTestFile(t *testing.T, name string, sizeMB int, option *config.ApplicationConfig) *os.File { - err := os.MkdirAll(option.UploadDir, 0755) + err := os.MkdirAll(option.UploadDir, 0750) if err != nil { t.Fatalf("Error MKDIR: %v", err) diff --git a/core/startup/startup.go b/core/startup/startup.go index 97882a22..b9e95ebf 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -23,24 +23,24 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode if options.ModelPath == "" { return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") } - err := os.MkdirAll(options.ModelPath, 0755) + err := os.MkdirAll(options.ModelPath, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) } if options.ImageDir != "" { - err := os.MkdirAll(options.ImageDir, 0755) + err := os.MkdirAll(options.ImageDir, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) } } if options.AudioDir != "" { - err := os.MkdirAll(options.AudioDir, 0755) + err := os.MkdirAll(options.AudioDir, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) } } if options.UploadDir != "" { - err := os.MkdirAll(options.UploadDir, 0755) + err := os.MkdirAll(options.UploadDir, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) } diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go index fc208377..b795cb30 100644 --- a/pkg/assets/extract.go +++ b/pkg/assets/extract.go @@ -10,7 +10,7 @@ import ( func ExtractFiles(content embed.FS, extractDir string) error { // Create the target directory if it doesn't exist - err := os.MkdirAll(extractDir, 0755) + err := os.MkdirAll(extractDir, 0750) if err != nil { return fmt.Errorf("failed to create directory: %v", err) } @@ -25,7 +25,7 @@ func ExtractFiles(content embed.FS, extractDir string) error { targetFile := filepath.Join(extractDir, path) if d.IsDir() { // Create the directory in the target directory - err := os.MkdirAll(targetFile, 0755) + err := os.MkdirAll(targetFile, 0750) if err != nil { return fmt.Errorf("failed to create directory: %v", err) } @@ -39,7 +39,7 @@ func ExtractFiles(content embed.FS, extractDir string) error { } // Create the file in the target directory - err = os.WriteFile(targetFile, fileData, 0644) + err = os.WriteFile(targetFile, fileData, 0600) if err != nil { return fmt.Errorf("failed to write file: %v", err) } diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index 46ccd6a1..797a264b 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -184,7 +184,7 @@ func DownloadFile(url string, filePath, sha string, fileN, total int, downloadSt } // Create parent directory - err = os.MkdirAll(filepath.Dir(filePath), 0755) + err = os.MkdirAll(filepath.Dir(filePath), 0750) if err != nil { return fmt.Errorf("failed to create parent directory for file %q: %v", filePath, err) } diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 59971bbc..2ab4c832 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -92,7 +92,7 @@ func ReadConfigFile(filePath string) (*Config, error) { func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error { // Create base path if it doesn't exist - err := os.MkdirAll(basePath, 0755) + err := os.MkdirAll(basePath, 0750) if err != nil { return fmt.Errorf("failed to create base path: %v", err) } @@ -125,12 +125,12 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides filePath := filepath.Join(basePath, template.Name+".tmpl") // Create parent directory - err := os.MkdirAll(filepath.Dir(filePath), 0755) + err := os.MkdirAll(filepath.Dir(filePath), 0750) if err != nil { return fmt.Errorf("failed to create parent directory for prompt template %q: %v", template.Name, err) } // Create and write file content - err = os.WriteFile(filePath, []byte(template.Content), 0644) + err = os.WriteFile(filePath, []byte(template.Content), 0600) if err != nil { return fmt.Errorf("failed to write prompt template %q: %v", template.Name, err) } @@ -170,7 +170,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides return fmt.Errorf("failed to marshal updated config YAML: %v", err) } - err = os.WriteFile(configFilePath, updatedConfigYAML, 0644) + err = os.WriteFile(configFilePath, updatedConfigYAML, 0600) if err != nil { return fmt.Errorf("failed to write updated config file: %v", err) } diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go index 21d3a03d..6eb63128 100644 --- a/pkg/gallery/models_test.go +++ b/pkg/gallery/models_test.go @@ -48,7 +48,7 @@ var _ = Describe("Model test", func() { }} out, err := yaml.Marshal(gallery) Expect(err).ToNot(HaveOccurred()) - err = os.WriteFile(filepath.Join(tempdir, "gallery_simple.yaml"), out, 0644) + err = os.WriteFile(filepath.Join(tempdir, "gallery_simple.yaml"), out, 0600) Expect(err).ToNot(HaveOccurred()) galleries := []Gallery{ diff --git a/pkg/model/process.go b/pkg/model/process.go index 5f63ee7f..08822fd9 100644 --- a/pkg/model/process.go +++ b/pkg/model/process.go @@ -65,7 +65,7 @@ func (ml *ModelLoader) GetGRPCPID(id string) (int, error) { func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string) error { // Make sure the process is executable - if err := os.Chmod(grpcProcess, 0755); err != nil { + if err := os.Chmod(grpcProcess, 0700); err != nil { return err } diff --git a/pkg/templates/cache_test.go b/pkg/templates/cache_test.go index 83af02b2..fc5941ed 100644 --- a/pkg/templates/cache_test.go +++ b/pkg/templates/cache_test.go @@ -21,9 +21,9 @@ var _ = Describe("TemplateCache", func() { Expect(err).NotTo(HaveOccurred()) // Writing example template files - err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0644) + err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0600) Expect(err).NotTo(HaveOccurred()) - err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0644) + err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0600) Expect(err).NotTo(HaveOccurred()) templateCache = templates.NewTemplateCache(tempDir) diff --git a/pkg/utils/config.go b/pkg/utils/config.go index 929e1f9f..8fd0ec0e 100644 --- a/pkg/utils/config.go +++ b/pkg/utils/config.go @@ -15,7 +15,7 @@ func SaveConfig(filePath, fileName string, obj any) { } absolutePath := filepath.Join(filePath, fileName) - err = os.WriteFile(absolutePath, file, 0644) + err = os.WriteFile(absolutePath, file, 0600) if err != nil { log.Error().Err(err).Str("filepath", absolutePath).Msg("failed to save configuration file") } diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go index a4ad4f90..54d0844c 100644 --- a/tests/integration/stores_test.go +++ b/tests/integration/stores_test.go @@ -36,7 +36,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs" tmpdir, err = os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0755) + err = os.Mkdir(backendAssetsDir, 0750) Expect(err).ToNot(HaveOccurred()) err = assets.ExtractFiles(backendAssets, backendAssetsDir) From eed285f9de7a0cd7d8d553b3a5792b99fb0893b5 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 25 Apr 2024 18:47:35 -0400 Subject: [PATCH 0381/2895] fix: update langchainjs (#2136) quick update of the langchainjs example to quiet down some dependency security scanner noise Signed-off-by: Dave Lee --- .../package-lock.json | 1610 ++++++++++++++--- .../langchainjs-localai-example/package.json | 10 +- .../langchainjs-localai-example/src/index.mts | 39 +- .../langchainjs-localai-example/tsconfig.json | 3 +- 4 files changed, 1407 insertions(+), 255 deletions(-) diff --git a/examples/langchain/langchainjs-localai-example/package-lock.json b/examples/langchain/langchainjs-localai-example/package-lock.json index e0a45539..48fee285 100644 --- a/examples/langchain/langchainjs-localai-example/package-lock.json +++ b/examples/langchain/langchainjs-localai-example/package-lock.json @@ -9,8 +9,9 @@ "version": "0.1.0", "license": "MIT", "dependencies": { - "langchain": "^0.0.67", - "typeorm": "^0.3.15" + "@langchain/community": "^0.0.52", + "@langchain/openai": "^0.0.28", + "langchain": "^0.1.36" }, "devDependencies": { "@types/node": "^18.16.4", @@ -18,47 +19,519 @@ } }, "node_modules/@anthropic-ai/sdk": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.4.3.tgz", - "integrity": "sha512-SZrlXvjUUYT9rPmSzlTtmVk1OjVNpkCzILRluhiYwNcxXfQyvPJDi0CI6PyymygcgtqEF5EVqhKmC/PtPsNEIw==", + "version": "0.9.1", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.9.1.tgz", + "integrity": "sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==", "dependencies": { - "@fortaine/fetch-event-source": "^3.0.6", - "cross-fetch": "^3.1.5" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "digest-fetch": "^1.3.0", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" } }, - "node_modules/@dqbd/tiktoken": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.7.tgz", - "integrity": "sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw==" - }, - "node_modules/@fortaine/fetch-event-source": { - "version": "3.0.6", - "resolved": "https://registry.npmjs.org/@fortaine/fetch-event-source/-/fetch-event-source-3.0.6.tgz", - "integrity": "sha512-621GAuLMvKtyZQ3IA6nlDWhV1V/7PGOTNIGLUifxt0KzM+dZIweJ6F3XvQF3QnqeNfS1N7WQ0Kil1Di/lhChEw==", + "node_modules/@langchain/community": { + "version": "0.0.52", + "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.0.52.tgz", + "integrity": "sha512-L+IMAAaLNP7++4HhdvuVJegc8bdw8WP77Jvp98YcySFZTZWH1yasSQSlFn3jgBk+3xLBsudpTZuttKTrZ/TtVQ==", + "dependencies": { + "@langchain/core": "~0.1.60", + "@langchain/openai": "~0.0.28", + "expr-eval": "^2.0.2", + "flat": "^5.0.2", + "langsmith": "~0.1.1", + "uuid": "^9.0.0", + "zod": "^3.22.3", + "zod-to-json-schema": "^3.22.5" + }, "engines": { - "node": ">=16.15" + "node": ">=18" + }, + "peerDependencies": { + "@aws-crypto/sha256-js": "^5.0.0", + "@aws-sdk/client-bedrock-agent-runtime": "^3.485.0", + "@aws-sdk/client-bedrock-runtime": "^3.422.0", + "@aws-sdk/client-dynamodb": "^3.310.0", + "@aws-sdk/client-kendra": "^3.352.0", + "@aws-sdk/client-lambda": "^3.310.0", + "@aws-sdk/client-sagemaker-runtime": "^3.310.0", + "@aws-sdk/client-sfn": "^3.310.0", + "@aws-sdk/credential-provider-node": "^3.388.0", + "@azure/search-documents": "^12.0.0", + "@clickhouse/client": "^0.2.5", + "@cloudflare/ai": "*", + "@datastax/astra-db-ts": "^1.0.0", + "@elastic/elasticsearch": "^8.4.0", + "@getmetal/metal-sdk": "*", + "@getzep/zep-js": "^0.9.0", + "@gomomento/sdk": "^1.51.1", + "@gomomento/sdk-core": "^1.51.1", + "@google-ai/generativelanguage": "^0.2.1", + "@gradientai/nodejs-sdk": "^1.2.0", + "@huggingface/inference": "^2.6.4", + "@mozilla/readability": "*", + "@neondatabase/serverless": "*", + "@opensearch-project/opensearch": "*", + "@pinecone-database/pinecone": "*", + "@planetscale/database": "^1.8.0", + "@premai/prem-sdk": "^0.3.25", + "@qdrant/js-client-rest": "^1.2.0", + "@raycast/api": "^1.55.2", + "@rockset/client": "^0.9.1", + "@smithy/eventstream-codec": "^2.0.5", + "@smithy/protocol-http": "^3.0.6", + "@smithy/signature-v4": "^2.0.10", + "@smithy/util-utf8": "^2.0.0", + "@supabase/postgrest-js": "^1.1.1", + "@supabase/supabase-js": "^2.10.0", + "@tensorflow-models/universal-sentence-encoder": "*", + "@tensorflow/tfjs-converter": "*", + "@tensorflow/tfjs-core": "*", + "@upstash/redis": "^1.20.6", + "@upstash/vector": "^1.0.7", + "@vercel/kv": "^0.2.3", + "@vercel/postgres": "^0.5.0", + "@writerai/writer-sdk": "^0.40.2", + "@xata.io/client": "^0.28.0", + "@xenova/transformers": "^2.5.4", + "@zilliz/milvus2-sdk-node": ">=2.2.7", + "better-sqlite3": "^9.4.0", + "cassandra-driver": "^4.7.2", + "cborg": "^4.1.1", + "chromadb": "*", + "closevector-common": "0.1.3", + "closevector-node": "0.1.6", + "closevector-web": "0.1.6", + "cohere-ai": "*", + "convex": "^1.3.1", + "couchbase": "^4.3.0", + "discord.js": "^14.14.1", + "dria": "^0.0.3", + "duck-duck-scrape": "^2.2.5", + "faiss-node": "^0.5.1", + "firebase-admin": "^11.9.0 || ^12.0.0", + "google-auth-library": "^8.9.0", + "googleapis": "^126.0.1", + "hnswlib-node": "^3.0.0", + "html-to-text": "^9.0.5", + "interface-datastore": "^8.2.11", + "ioredis": "^5.3.2", + "it-all": "^3.0.4", + "jsdom": "*", + "jsonwebtoken": "^9.0.2", + "llmonitor": "^0.5.9", + "lodash": "^4.17.21", + "lunary": "^0.6.11", + "mongodb": ">=5.2.0", + "mysql2": "^3.3.3", + "neo4j-driver": "*", + "node-llama-cpp": "*", + "pg": "^8.11.0", + "pg-copy-streams": "^6.0.5", + "pickleparser": "^0.2.1", + "portkey-ai": "^0.1.11", + "redis": "*", + "replicate": "^0.18.0", + "typeorm": "^0.3.12", + "typesense": "^1.5.3", + "usearch": "^1.1.1", + "vectordb": "^0.1.4", + "voy-search": "0.6.2", + "weaviate-ts-client": "*", + "web-auth-library": "^1.0.3", + "ws": "^8.14.2" + }, + "peerDependenciesMeta": { + "@aws-crypto/sha256-js": { + "optional": true + }, + "@aws-sdk/client-bedrock-agent-runtime": { + "optional": true + }, + "@aws-sdk/client-bedrock-runtime": { + "optional": true + }, + "@aws-sdk/client-dynamodb": { + "optional": true + }, + "@aws-sdk/client-kendra": { + "optional": true + }, + "@aws-sdk/client-lambda": { + "optional": true + }, + "@aws-sdk/client-sagemaker-runtime": { + "optional": true + }, + "@aws-sdk/client-sfn": { + "optional": true + }, + "@aws-sdk/credential-provider-node": { + "optional": true + }, + "@azure/search-documents": { + "optional": true + }, + "@clickhouse/client": { + "optional": true + }, + "@cloudflare/ai": { + "optional": true + }, + "@datastax/astra-db-ts": { + "optional": true + }, + "@elastic/elasticsearch": { + "optional": true + }, + "@getmetal/metal-sdk": { + "optional": true + }, + "@getzep/zep-js": { + "optional": true + }, + "@gomomento/sdk": { + "optional": true + }, + "@gomomento/sdk-core": { + "optional": true + }, + "@google-ai/generativelanguage": { + "optional": true + }, + "@gradientai/nodejs-sdk": { + "optional": true + }, + "@huggingface/inference": { + "optional": true + }, + "@mozilla/readability": { + "optional": true + }, + "@neondatabase/serverless": { + "optional": true + }, + "@opensearch-project/opensearch": { + "optional": true + }, + "@pinecone-database/pinecone": { + "optional": true + }, + "@planetscale/database": { + "optional": true + }, + "@premai/prem-sdk": { + "optional": true + }, + "@qdrant/js-client-rest": { + "optional": true + }, + "@raycast/api": { + "optional": true + }, + "@rockset/client": { + "optional": true + }, + "@smithy/eventstream-codec": { + "optional": true + }, + "@smithy/protocol-http": { + "optional": true + }, + "@smithy/signature-v4": { + "optional": true + }, + "@smithy/util-utf8": { + "optional": true + }, + "@supabase/postgrest-js": { + "optional": true + }, + "@supabase/supabase-js": { + "optional": true + }, + "@tensorflow-models/universal-sentence-encoder": { + "optional": true + }, + "@tensorflow/tfjs-converter": { + "optional": true + }, + "@tensorflow/tfjs-core": { + "optional": true + }, + "@upstash/redis": { + "optional": true + }, + "@upstash/vector": { + "optional": true + }, + "@vercel/kv": { + "optional": true + }, + "@vercel/postgres": { + "optional": true + }, + "@writerai/writer-sdk": { + "optional": true + }, + "@xata.io/client": { + "optional": true + }, + "@xenova/transformers": { + "optional": true + }, + "@zilliz/milvus2-sdk-node": { + "optional": true + }, + "better-sqlite3": { + "optional": true + }, + "cassandra-driver": { + "optional": true + }, + "cborg": { + "optional": true + }, + "chromadb": { + "optional": true + }, + "closevector-common": { + "optional": true + }, + "closevector-node": { + "optional": true + }, + "closevector-web": { + "optional": true + }, + "cohere-ai": { + "optional": true + }, + "convex": { + "optional": true + }, + "couchbase": { + "optional": true + }, + "discord.js": { + "optional": true + }, + "dria": { + "optional": true + }, + "duck-duck-scrape": { + "optional": true + }, + "faiss-node": { + "optional": true + }, + "firebase-admin": { + "optional": true + }, + "google-auth-library": { + "optional": true + }, + "googleapis": { + "optional": true + }, + "hnswlib-node": { + "optional": true + }, + "html-to-text": { + "optional": true + }, + "interface-datastore": { + "optional": true + }, + "ioredis": { + "optional": true + }, + "it-all": { + "optional": true + }, + "jsdom": { + "optional": true + }, + "jsonwebtoken": { + "optional": true + }, + "llmonitor": { + "optional": true + }, + "lodash": { + "optional": true + }, + "lunary": { + "optional": true + }, + "mongodb": { + "optional": true + }, + "mysql2": { + "optional": true + }, + "neo4j-driver": { + "optional": true + }, + "node-llama-cpp": { + "optional": true + }, + "pg": { + "optional": true + }, + "pg-copy-streams": { + "optional": true + }, + "pickleparser": { + "optional": true + }, + "portkey-ai": { + "optional": true + }, + "redis": { + "optional": true + }, + "replicate": { + "optional": true + }, + "typeorm": { + "optional": true + }, + "typesense": { + "optional": true + }, + "usearch": { + "optional": true + }, + "vectordb": { + "optional": true + }, + "voy-search": { + "optional": true + }, + "weaviate-ts-client": { + "optional": true + }, + "web-auth-library": { + "optional": true + }, + "ws": { + "optional": true + } + } + }, + "node_modules/@langchain/core": { + "version": "0.1.60", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.1.60.tgz", + "integrity": "sha512-3EJW4ir0tFe17AakpXCgO9flSoDjFELpSQs2w/CMZ5FBlHYxo3ODgVQAZvlHy97khEVgcnvlL3EDhPE7IdNibA==", + "dependencies": { + "ansi-styles": "^5.0.0", + "camelcase": "6", + "decamelize": "1.2.0", + "js-tiktoken": "^1.0.8", + "langsmith": "~0.1.7", + "ml-distance": "^4.0.0", + "mustache": "^4.2.0", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@langchain/openai": { + "version": "0.0.28", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.0.28.tgz", + "integrity": "sha512-2s1RA3/eAnz4ahdzsMPBna9hfAqpFNlWdHiPxVGZ5yrhXsbLWWoPcF+22LCk9t0HJKtazi2GCIWc0HVXH9Abig==", + "dependencies": { + "@langchain/core": "~0.1.56", + "js-tiktoken": "^1.0.7", + "openai": "^4.32.1", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@langchain/textsplitters": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.0.0.tgz", + "integrity": "sha512-3hPesWomnmVeYMppEGYbyv0v/sRUugUdlFBNn9m1ueJYHAIKbvCErkWxNUH3guyKKYgJVrkvZoQxcd9faucSaw==", + "dependencies": { + "@langchain/core": "~0.1", + "js-tiktoken": "^1.0.11" + }, + "engines": { + "node": ">=18" } }, "node_modules/@sqltools/formatter": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/@sqltools/formatter/-/formatter-1.2.5.tgz", - "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==" + "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==", + "optional": true, + "peer": true }, "node_modules/@types/node": { "version": "18.16.4", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz", - "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==", - "dev": true + "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==" + }, + "node_modules/@types/node-fetch": { + "version": "2.6.11", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz", + "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.0" + } }, "node_modules/@types/retry": { "version": "0.12.0", "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" }, + "node_modules/@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==" + }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, + "node_modules/agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, "node_modules/ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "optional": true, + "peer": true, "engines": { "node": ">=8" } @@ -77,33 +550,41 @@ "node_modules/any-promise": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==" + "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", + "optional": true, + "peer": true }, "node_modules/app-root-path": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz", "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==", + "optional": true, + "peer": true, "engines": { "node": ">= 6.0.0" } }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, - "node_modules/axios": { - "version": "0.26.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", - "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", - "dependencies": { - "follow-redirects": "^1.14.8" - } - }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "optional": true, + "peer": true + }, + "node_modules/base-64": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", + "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" }, "node_modules/base64-js": { "version": "1.5.1", @@ -141,15 +622,12 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "optional": true, + "peer": true, "dependencies": { "balanced-match": "^1.0.0" } }, - "node_modules/browser-or-node": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/browser-or-node/-/browser-or-node-2.1.1.tgz", - "integrity": "sha512-8CVjaLJGuSKMVTxJ2DpBl5XnlNDiT4cQFeuCJJrvJmts9YrTZDizTX7PjC2s6W4x+MBGZeEY6dGMrF04/6Hgqg==" - }, "node_modules/buffer": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", @@ -168,15 +646,30 @@ "url": "https://feross.org/support" } ], + "optional": true, + "peer": true, "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, + "node_modules/camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "optional": true, + "peer": true, "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -192,6 +685,8 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "dependencies": { "color-convert": "^2.0.1" }, @@ -202,10 +697,20 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/charenc": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", + "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==", + "engines": { + "node": "*" + } + }, "node_modules/cli-highlight": { "version": "2.1.11", "resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz", "integrity": "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==", + "optional": true, + "peer": true, "dependencies": { "chalk": "^4.0.0", "highlight.js": "^10.7.1", @@ -226,6 +731,8 @@ "version": "7.0.4", "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "optional": true, + "peer": true, "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.0", @@ -236,6 +743,8 @@ "version": "16.2.0", "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "optional": true, + "peer": true, "dependencies": { "cliui": "^7.0.2", "escalade": "^3.1.1", @@ -253,6 +762,8 @@ "version": "20.2.9", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "optional": true, + "peer": true, "engines": { "node": ">=10" } @@ -261,6 +772,8 @@ "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "optional": true, + "peer": true, "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", @@ -274,6 +787,8 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "optional": true, + "peer": true, "dependencies": { "color-name": "~1.1.4" }, @@ -284,7 +799,9 @@ "node_modules/color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "optional": true, + "peer": true }, "node_modules/combined-stream": { "version": "1.0.8", @@ -297,18 +814,28 @@ "node": ">= 0.8" } }, - "node_modules/cross-fetch": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.5.tgz", - "integrity": "sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==", - "dependencies": { - "node-fetch": "2.6.7" + "node_modules/commander": { + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==", + "engines": { + "node": ">=14" + } + }, + "node_modules/crypt": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", + "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==", + "engines": { + "node": "*" } }, "node_modules/debug": { "version": "4.3.4", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "optional": true, + "peer": true, "dependencies": { "ms": "2.1.2" }, @@ -321,6 +848,14 @@ } } }, + "node_modules/decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -329,10 +864,21 @@ "node": ">=0.4.0" } }, + "node_modules/digest-fetch": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", + "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", + "dependencies": { + "base-64": "^0.1.0", + "md5": "^2.3.0" + } + }, "node_modules/dotenv": { "version": "16.0.3", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz", "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==", + "optional": true, + "peer": true, "engines": { "node": ">=12" } @@ -340,12 +886,24 @@ "node_modules/emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "optional": true, + "peer": true }, "node_modules/escalade": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "optional": true, + "peer": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", "engines": { "node": ">=6" } @@ -368,25 +926,6 @@ "flat": "cli.js" } }, - "node_modules/follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/RubenVerborgh" - } - ], - "engines": { - "node": ">=4.0" - }, - "peerDependenciesMeta": { - "debug": { - "optional": true - } - } - }, "node_modules/form-data": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", @@ -400,15 +939,44 @@ "node": ">= 6" } }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, + "node_modules/formdata-node/node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "engines": { + "node": ">= 14" + } + }, "node_modules/fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "optional": true, + "peer": true }, "node_modules/get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "optional": true, + "peer": true, "engines": { "node": "6.* || 8.* || >= 10.*" } @@ -417,6 +985,8 @@ "version": "8.1.0", "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz", "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==", + "optional": true, + "peer": true, "dependencies": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", @@ -435,6 +1005,8 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "optional": true, + "peer": true, "engines": { "node": ">=8" } @@ -443,10 +1015,20 @@ "version": "10.7.3", "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz", "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==", + "optional": true, + "peer": true, "engines": { "node": "*" } }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "dependencies": { + "ms": "^2.0.0" + } + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -464,12 +1046,16 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "optional": true, + "peer": true }, "node_modules/inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "optional": true, + "peer": true, "dependencies": { "once": "^1.3.0", "wrappy": "1" @@ -478,21 +1064,49 @@ "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "optional": true, + "peer": true }, "node_modules/is-any-array": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" }, + "node_modules/is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" + }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "optional": true, + "peer": true, "engines": { "node": ">=8" } }, + "node_modules/js-tiktoken": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.11.tgz", + "integrity": "sha512-PajXFLq2vx7/8jllQZ43vzNpAai/0MOVdJjW/UrNyJorNQRTjHrqdGJG/mjHVy7h9M6dW6CaG43eNLMYFkTh6w==", + "dependencies": { + "base64-js": "^1.5.1" + } + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, "node_modules/jsonpointer": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", @@ -502,76 +1116,120 @@ } }, "node_modules/langchain": { - "version": "0.0.67", - "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.67.tgz", - "integrity": "sha512-OO9NEoVYJyNTmrA76rgisA48LkA6Si7qVAS+1hakzKwf/Hj7GhvDe/NpVaWmOFtkAHusJHSbCplbeJKWIgFR2g==", + "version": "0.1.36", + "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.1.36.tgz", + "integrity": "sha512-NTbnCL/jKWIeEI//Nm1oG8nhW3vkYWvEMr1MPotmTThTfeKfO87eV/OAzAyh6Ruy6GFs/qofRgQZGIe6XvXTNQ==", "dependencies": { - "@anthropic-ai/sdk": "^0.4.3", - "@dqbd/tiktoken": "^1.0.7", - "ansi-styles": "^5.0.0", + "@anthropic-ai/sdk": "^0.9.1", + "@langchain/community": "~0.0.47", + "@langchain/core": "~0.1.60", + "@langchain/openai": "~0.0.28", + "@langchain/textsplitters": "~0.0.0", "binary-extensions": "^2.2.0", - "browser-or-node": "^2.1.1", - "expr-eval": "^2.0.2", - "flat": "^5.0.2", + "js-tiktoken": "^1.0.7", + "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", + "langchainhub": "~0.0.8", + "langsmith": "~0.1.7", "ml-distance": "^4.0.0", - "object-hash": "^3.0.0", - "openai": "^3.2.0", - "p-queue": "^6.6.2", + "openapi-types": "^12.1.3", "p-retry": "4", "uuid": "^9.0.0", "yaml": "^2.2.1", - "zod": "^3.21.4", - "zod-to-json-schema": "^3.20.4" + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" }, "engines": { "node": ">=18" }, "peerDependencies": { - "@aws-sdk/client-lambda": "^3.310.0", "@aws-sdk/client-s3": "^3.310.0", - "@getmetal/metal-sdk": "*", - "@huggingface/inference": "^1.5.1", - "@opensearch-project/opensearch": "*", + "@aws-sdk/client-sagemaker-runtime": "^3.310.0", + "@aws-sdk/client-sfn": "^3.310.0", + "@aws-sdk/credential-provider-node": "^3.388.0", + "@azure/storage-blob": "^12.15.0", + "@gomomento/sdk": "^1.51.1", + "@gomomento/sdk-core": "^1.51.1", + "@gomomento/sdk-web": "^1.51.1", + "@google-ai/generativelanguage": "^0.2.1", + "@google-cloud/storage": "^6.10.1 || ^7.7.0", + "@mendable/firecrawl-js": "^0.0.13", + "@notionhq/client": "^2.2.10", "@pinecone-database/pinecone": "*", "@supabase/supabase-js": "^2.10.0", - "@tensorflow-models/universal-sentence-encoder": "*", - "@tensorflow/tfjs-converter": "*", - "@tensorflow/tfjs-core": "*", - "@zilliz/milvus2-sdk-node": "^2.2.0", + "@vercel/kv": "^0.2.3", + "@xata.io/client": "^0.28.0", + "apify-client": "^2.7.1", + "assemblyai": "^4.0.0", "axios": "*", "cheerio": "^1.0.0-rc.12", - "chromadb": "^1.4.0", - "cohere-ai": "^5.0.2", + "chromadb": "*", + "convex": "^1.3.1", + "couchbase": "^4.3.0", "d3-dsv": "^2.0.0", "epub2": "^3.0.1", - "hnswlib-node": "^1.4.2", + "fast-xml-parser": "*", + "google-auth-library": "^8.9.0", + "handlebars": "^4.7.8", "html-to-text": "^9.0.5", - "mammoth": "*", - "mongodb": "^5.2.0", + "ignore": "^5.2.0", + "ioredis": "^5.3.2", + "jsdom": "*", + "mammoth": "^1.6.0", + "mongodb": ">=5.2.0", + "node-llama-cpp": "*", + "notion-to-md": "^3.1.0", + "officeparser": "^4.0.4", "pdf-parse": "1.1.1", + "peggy": "^3.0.2", "playwright": "^1.32.1", "puppeteer": "^19.7.2", + "pyodide": "^0.24.1", "redis": "^4.6.4", - "replicate": "^0.9.0", - "srt-parser-2": "^1.2.2", + "sonix-speech-recognition": "^2.1.1", + "srt-parser-2": "^1.2.3", "typeorm": "^0.3.12", - "weaviate-ts-client": "^1.0.0" + "weaviate-ts-client": "*", + "web-auth-library": "^1.0.3", + "ws": "^8.14.2", + "youtube-transcript": "^1.0.6", + "youtubei.js": "^9.1.0" }, "peerDependenciesMeta": { - "@aws-sdk/client-lambda": { - "optional": true - }, "@aws-sdk/client-s3": { "optional": true }, - "@getmetal/metal-sdk": { + "@aws-sdk/client-sagemaker-runtime": { "optional": true }, - "@huggingface/inference": { + "@aws-sdk/client-sfn": { "optional": true }, - "@opensearch-project/opensearch": { + "@aws-sdk/credential-provider-node": { + "optional": true + }, + "@azure/storage-blob": { + "optional": true + }, + "@gomomento/sdk": { + "optional": true + }, + "@gomomento/sdk-core": { + "optional": true + }, + "@gomomento/sdk-web": { + "optional": true + }, + "@google-ai/generativelanguage": { + "optional": true + }, + "@google-cloud/storage": { + "optional": true + }, + "@mendable/firecrawl-js": { + "optional": true + }, + "@notionhq/client": { "optional": true }, "@pinecone-database/pinecone": { @@ -580,16 +1238,16 @@ "@supabase/supabase-js": { "optional": true }, - "@tensorflow-models/universal-sentence-encoder": { + "@vercel/kv": { "optional": true }, - "@tensorflow/tfjs-converter": { + "@xata.io/client": { "optional": true }, - "@tensorflow/tfjs-core": { + "apify-client": { "optional": true }, - "@zilliz/milvus2-sdk-node": { + "assemblyai": { "optional": true }, "axios": { @@ -601,7 +1259,10 @@ "chromadb": { "optional": true }, - "cohere-ai": { + "convex": { + "optional": true + }, + "couchbase": { "optional": true }, "d3-dsv": { @@ -610,31 +1271,64 @@ "epub2": { "optional": true }, - "hnswlib-node": { + "faiss-node": { + "optional": true + }, + "fast-xml-parser": { + "optional": true + }, + "google-auth-library": { + "optional": true + }, + "handlebars": { "optional": true }, "html-to-text": { "optional": true }, + "ignore": { + "optional": true + }, + "ioredis": { + "optional": true + }, + "jsdom": { + "optional": true + }, "mammoth": { "optional": true }, "mongodb": { "optional": true }, + "node-llama-cpp": { + "optional": true + }, + "notion-to-md": { + "optional": true + }, + "officeparser": { + "optional": true + }, "pdf-parse": { "optional": true }, + "peggy": { + "optional": true + }, "playwright": { "optional": true }, "puppeteer": { "optional": true }, + "pyodide": { + "optional": true + }, "redis": { "optional": true }, - "replicate": { + "sonix-speech-recognition": { "optional": true }, "srt-parser-2": { @@ -645,9 +1339,48 @@ }, "weaviate-ts-client": { "optional": true + }, + "web-auth-library": { + "optional": true + }, + "ws": { + "optional": true + }, + "youtube-transcript": { + "optional": true + }, + "youtubei.js": { + "optional": true } } }, + "node_modules/langchainhub": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.8.tgz", + "integrity": "sha512-Woyb8YDHgqqTOZvWIbm2CaFDGfZ4NTSyXV687AG4vXEfoNo7cGQp7nhl7wL3ehenKWmNEmcxCLgOZzW8jE6lOQ==" + }, + "node_modules/langsmith": { + "version": "0.1.18", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.1.18.tgz", + "integrity": "sha512-LHk0aIFAl3/iiKvUzAiM8Xdm13bRO70XERQeHCF99fL2X815Jc47nxu6m7usSuQC8sw6rirCKZbGm18cqdUEzA==", + "dependencies": { + "@types/uuid": "^9.0.1", + "commander": "^10.0.1", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0" + } + }, + "node_modules/md5": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", + "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", + "dependencies": { + "charenc": "0.0.2", + "crypt": "0.0.2", + "is-buffer": "~1.1.6" + } + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -671,6 +1404,8 @@ "version": "5.1.6", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "optional": true, + "peer": true, "dependencies": { "brace-expansion": "^2.0.1" }, @@ -682,6 +1417,8 @@ "version": "2.1.6", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-2.1.6.tgz", "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==", + "optional": true, + "peer": true, "bin": { "mkdirp": "dist/cjs/src/bin.js" }, @@ -737,20 +1474,48 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/mustache": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", + "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==", + "bin": { + "mustache": "bin/mustache" + } + }, "node_modules/mz": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", + "optional": true, + "peer": true, "dependencies": { "any-promise": "^1.0.0", "object-assign": "^4.0.1", "thenify-all": "^1.0.0" } }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "engines": { + "node": ">=10.5.0" + } + }, "node_modules/node-fetch": { - "version": "2.6.7", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", - "integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==", + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", "dependencies": { "whatwg-url": "^5.0.0" }, @@ -781,35 +1546,45 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "optional": true, + "peer": true, "engines": { "node": ">=0.10.0" } }, - "node_modules/object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "engines": { - "node": ">= 6" - } - }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "optional": true, + "peer": true, "dependencies": { "wrappy": "1" } }, "node_modules/openai": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz", - "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==", + "version": "4.38.5", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.38.5.tgz", + "integrity": "sha512-Ym5GJL98ZhLJJ7enBx53jjG3vwN/fsB+Ozh46nnRZZS9W1NiYqbwkJ+sXd3dkCIiWIgcyyOPL2Zr8SQAzbpj3g==", "dependencies": { - "axios": "^0.26.0", - "form-data": "^4.0.0" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" + }, + "bin": { + "openai": "bin/cli" } }, + "node_modules/openapi-types": { + "version": "12.1.3", + "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz", + "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==" + }, "node_modules/p-finally": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", @@ -859,12 +1634,16 @@ "node_modules/parse5": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz", - "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==" + "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==", + "optional": true, + "peer": true }, "node_modules/parse5-htmlparser2-tree-adapter": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz", "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==", + "optional": true, + "peer": true, "dependencies": { "parse5": "^6.0.1" } @@ -872,17 +1651,23 @@ "node_modules/parse5-htmlparser2-tree-adapter/node_modules/parse5": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" + "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==", + "optional": true, + "peer": true }, "node_modules/reflect-metadata": { "version": "0.1.13", "resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.13.tgz", - "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==" + "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==", + "optional": true, + "peer": true }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "optional": true, + "peer": true, "engines": { "node": ">=0.10.0" } @@ -912,12 +1697,16 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "optional": true, + "peer": true }, "node_modules/sha.js": { "version": "2.4.11", "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", + "optional": true, + "peer": true, "dependencies": { "inherits": "^2.0.1", "safe-buffer": "^5.0.1" @@ -930,6 +1719,8 @@ "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "optional": true, + "peer": true, "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", @@ -943,6 +1734,8 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "optional": true, + "peer": true, "dependencies": { "ansi-regex": "^5.0.1" }, @@ -954,6 +1747,8 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "optional": true, + "peer": true, "dependencies": { "has-flag": "^4.0.0" }, @@ -965,6 +1760,8 @@ "version": "3.3.1", "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", + "optional": true, + "peer": true, "dependencies": { "any-promise": "^1.0.0" } @@ -973,6 +1770,8 @@ "version": "1.6.0", "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", + "optional": true, + "peer": true, "dependencies": { "thenify": ">= 3.1.0 < 4" }, @@ -988,12 +1787,16 @@ "node_modules/tslib": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==", + "optional": true, + "peer": true }, "node_modules/typeorm": { "version": "0.3.15", "resolved": "https://registry.npmjs.org/typeorm/-/typeorm-0.3.15.tgz", "integrity": "sha512-R4JSw8QjDP1W+ypeRz/XrCXIqubrLSnNAzJAp9EQSQIPHTv+YmUHZis8g08lOwFpuhqL9m8jkPSz8GWEKlU/ow==", + "optional": true, + "peer": true, "dependencies": { "@sqltools/formatter": "^1.2.5", "app-root-path": "^3.1.0", @@ -1115,6 +1918,14 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "engines": { + "node": ">= 8" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -1133,6 +1944,8 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "optional": true, + "peer": true, "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", @@ -1149,6 +1962,8 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "dependencies": { "color-convert": "^2.0.1" }, @@ -1162,12 +1977,16 @@ "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "optional": true, + "peer": true }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "optional": true, + "peer": true, "engines": { "node": ">=10" } @@ -1184,6 +2003,8 @@ "version": "17.7.2", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "optional": true, + "peer": true, "dependencies": { "cliui": "^8.0.1", "escalade": "^3.1.1", @@ -1201,67 +2022,154 @@ "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "optional": true, + "peer": true, "engines": { "node": ">=12" } }, "node_modules/zod": { - "version": "3.21.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", - "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==", + "version": "3.23.4", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.4.tgz", + "integrity": "sha512-/AtWOKbBgjzEYYQRNfoGKHObgfAZag6qUJX1VbHo2PRBgS+wfWagEY2mizjfyAPcGesrJOcx/wcl0L9WnVrHFw==", "funding": { "url": "https://github.com/sponsors/colinhacks" } }, "node_modules/zod-to-json-schema": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.0.tgz", - "integrity": "sha512-+KyFCzqKwE6CxMSZxEUBaGmdXzB09BoFebO+xef/ISE4cTfReQlyThYbS8aqd3uWkdt9fz5BGHsY0CbY+Ra9oA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", "peerDependencies": { - "zod": "^3.21.4" + "zod": "^3.23.3" } } }, "dependencies": { "@anthropic-ai/sdk": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.4.3.tgz", - "integrity": "sha512-SZrlXvjUUYT9rPmSzlTtmVk1OjVNpkCzILRluhiYwNcxXfQyvPJDi0CI6PyymygcgtqEF5EVqhKmC/PtPsNEIw==", + "version": "0.9.1", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.9.1.tgz", + "integrity": "sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==", "requires": { - "@fortaine/fetch-event-source": "^3.0.6", - "cross-fetch": "^3.1.5" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "digest-fetch": "^1.3.0", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" } }, - "@dqbd/tiktoken": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.7.tgz", - "integrity": "sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw==" + "@langchain/community": { + "version": "0.0.52", + "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.0.52.tgz", + "integrity": "sha512-L+IMAAaLNP7++4HhdvuVJegc8bdw8WP77Jvp98YcySFZTZWH1yasSQSlFn3jgBk+3xLBsudpTZuttKTrZ/TtVQ==", + "requires": { + "@langchain/core": "0.1.5", + "@langchain/openai": "~0.0.28", + "expr-eval": "^2.0.2", + "flat": "^5.0.2", + "langsmith": "~0.1.1", + "uuid": "^9.0.0", + "zod": "^3.22.3", + "zod-to-json-schema": "^3.22.5" + } }, - "@fortaine/fetch-event-source": { - "version": "3.0.6", - "resolved": "https://registry.npmjs.org/@fortaine/fetch-event-source/-/fetch-event-source-3.0.6.tgz", - "integrity": "sha512-621GAuLMvKtyZQ3IA6nlDWhV1V/7PGOTNIGLUifxt0KzM+dZIweJ6F3XvQF3QnqeNfS1N7WQ0Kil1Di/lhChEw==" + "@langchain/core": { + "version": "0.1.60", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.1.60.tgz", + "integrity": "sha512-3EJW4ir0tFe17AakpXCgO9flSoDjFELpSQs2w/CMZ5FBlHYxo3ODgVQAZvlHy97khEVgcnvlL3EDhPE7IdNibA==", + "requires": { + "ansi-styles": "^5.0.0", + "camelcase": "6", + "decamelize": "1.2.0", + "js-tiktoken": "^1.0.8", + "langsmith": "~0.1.7", + "ml-distance": "^4.0.0", + "mustache": "^4.2.0", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + } + }, + "@langchain/openai": { + "version": "0.0.28", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.0.28.tgz", + "integrity": "sha512-2s1RA3/eAnz4ahdzsMPBna9hfAqpFNlWdHiPxVGZ5yrhXsbLWWoPcF+22LCk9t0HJKtazi2GCIWc0HVXH9Abig==", + "requires": { + "@langchain/core": "0.1.5", + "js-tiktoken": "^1.0.7", + "openai": "^4.32.1", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + } + }, + "@langchain/textsplitters": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.0.0.tgz", + "integrity": "sha512-3hPesWomnmVeYMppEGYbyv0v/sRUugUdlFBNn9m1ueJYHAIKbvCErkWxNUH3guyKKYgJVrkvZoQxcd9faucSaw==", + "requires": { + "@langchain/core": "~0.1", + "js-tiktoken": "^1.0.11" + } }, "@sqltools/formatter": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/@sqltools/formatter/-/formatter-1.2.5.tgz", - "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==" + "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==", + "optional": true, + "peer": true }, "@types/node": { "version": "18.16.4", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz", - "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==", - "dev": true + "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==" + }, + "@types/node-fetch": { + "version": "2.6.11", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz", + "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==", + "requires": { + "@types/node": "*", + "form-data": "^4.0.0" + } }, "@types/retry": { "version": "0.12.0", "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" }, + "@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==" + }, + "abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "requires": { + "event-target-shim": "^5.0.0" + } + }, + "agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "requires": { + "humanize-ms": "^1.2.1" + } + }, "ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==" + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "optional": true, + "peer": true }, "ansi-styles": { "version": "5.2.0", @@ -1271,30 +2179,38 @@ "any-promise": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==" + "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", + "optional": true, + "peer": true }, "app-root-path": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz", - "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==" + "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==", + "optional": true, + "peer": true + }, + "argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, - "axios": { - "version": "0.26.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", - "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", - "requires": { - "follow-redirects": "^1.14.8" - } - }, "balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "optional": true, + "peer": true + }, + "base-64": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", + "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" }, "base64-js": { "version": "1.5.1", @@ -1315,28 +2231,34 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "optional": true, + "peer": true, "requires": { "balanced-match": "^1.0.0" } }, - "browser-or-node": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/browser-or-node/-/browser-or-node-2.1.1.tgz", - "integrity": "sha512-8CVjaLJGuSKMVTxJ2DpBl5XnlNDiT4cQFeuCJJrvJmts9YrTZDizTX7PjC2s6W4x+MBGZeEY6dGMrF04/6Hgqg==" - }, "buffer": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "optional": true, + "peer": true, "requires": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, + "camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==" + }, "chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "optional": true, + "peer": true, "requires": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -1346,16 +2268,25 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "requires": { "color-convert": "^2.0.1" } } } }, + "charenc": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", + "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==" + }, "cli-highlight": { "version": "2.1.11", "resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz", "integrity": "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==", + "optional": true, + "peer": true, "requires": { "chalk": "^4.0.0", "highlight.js": "^10.7.1", @@ -1369,6 +2300,8 @@ "version": "7.0.4", "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "optional": true, + "peer": true, "requires": { "string-width": "^4.2.0", "strip-ansi": "^6.0.0", @@ -1379,6 +2312,8 @@ "version": "16.2.0", "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "optional": true, + "peer": true, "requires": { "cliui": "^7.0.2", "escalade": "^3.1.1", @@ -1392,7 +2327,9 @@ "yargs-parser": { "version": "20.2.9", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", - "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==" + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "optional": true, + "peer": true } } }, @@ -1400,6 +2337,8 @@ "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "optional": true, + "peer": true, "requires": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", @@ -1410,6 +2349,8 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "optional": true, + "peer": true, "requires": { "color-name": "~1.1.4" } @@ -1417,7 +2358,9 @@ "color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "optional": true, + "peer": true }, "combined-stream": { "version": "1.0.8", @@ -1427,41 +2370,70 @@ "delayed-stream": "~1.0.0" } }, - "cross-fetch": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.5.tgz", - "integrity": "sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==", - "requires": { - "node-fetch": "2.6.7" - } + "commander": { + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==" + }, + "crypt": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", + "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==" }, "debug": { "version": "4.3.4", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "optional": true, + "peer": true, "requires": { "ms": "2.1.2" } }, + "decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==" + }, "delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" }, + "digest-fetch": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", + "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", + "requires": { + "base-64": "^0.1.0", + "md5": "^2.3.0" + } + }, "dotenv": { "version": "16.0.3", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz", - "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==" + "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==", + "optional": true, + "peer": true }, "emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "optional": true, + "peer": true }, "escalade": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==" + "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "optional": true, + "peer": true + }, + "event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==" }, "eventemitter3": { "version": "4.0.7", @@ -1478,11 +2450,6 @@ "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==" }, - "follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==" - }, "form-data": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", @@ -1493,20 +2460,47 @@ "mime-types": "^2.1.12" } }, + "form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "requires": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "dependencies": { + "web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==" + } + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "optional": true, + "peer": true }, "get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==" + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "optional": true, + "peer": true }, "glob": { "version": "8.1.0", "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz", "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==", + "optional": true, + "peer": true, "requires": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", @@ -1518,22 +2512,38 @@ "has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==" + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "optional": true, + "peer": true }, "highlight.js": { "version": "10.7.3", "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz", - "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==" + "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==", + "optional": true, + "peer": true + }, + "humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "requires": { + "ms": "^2.0.0" + } }, "ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", - "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==" + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "optional": true, + "peer": true }, "inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "optional": true, + "peer": true, "requires": { "once": "^1.3.0", "wrappy": "1" @@ -1542,17 +2552,42 @@ "inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "optional": true, + "peer": true }, "is-any-array": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" }, + "is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" + }, "is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==" + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "optional": true, + "peer": true + }, + "js-tiktoken": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.11.tgz", + "integrity": "sha512-PajXFLq2vx7/8jllQZ43vzNpAai/0MOVdJjW/UrNyJorNQRTjHrqdGJG/mjHVy7h9M6dW6CaG43eNLMYFkTh6w==", + "requires": { + "base64-js": "^1.5.1" + } + }, + "js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "requires": { + "argparse": "^2.0.1" + } }, "jsonpointer": { "version": "5.0.1", @@ -1560,27 +2595,55 @@ "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==" }, "langchain": { - "version": "0.0.67", - "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.67.tgz", - "integrity": "sha512-OO9NEoVYJyNTmrA76rgisA48LkA6Si7qVAS+1hakzKwf/Hj7GhvDe/NpVaWmOFtkAHusJHSbCplbeJKWIgFR2g==", + "version": "0.1.36", + "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.1.36.tgz", + "integrity": "sha512-NTbnCL/jKWIeEI//Nm1oG8nhW3vkYWvEMr1MPotmTThTfeKfO87eV/OAzAyh6Ruy6GFs/qofRgQZGIe6XvXTNQ==", "requires": { - "@anthropic-ai/sdk": "^0.4.3", - "@dqbd/tiktoken": "^1.0.7", - "ansi-styles": "^5.0.0", + "@anthropic-ai/sdk": "^0.9.1", + "@langchain/community": "~0.0.47", + "@langchain/core": "0.1.5", + "@langchain/openai": "~0.0.28", + "@langchain/textsplitters": "~0.0.0", "binary-extensions": "^2.2.0", - "browser-or-node": "^2.1.1", - "expr-eval": "^2.0.2", - "flat": "^5.0.2", + "js-tiktoken": "^1.0.7", + "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", + "langchainhub": "~0.0.8", + "langsmith": "~0.1.7", "ml-distance": "^4.0.0", - "object-hash": "^3.0.0", - "openai": "^3.2.0", - "p-queue": "^6.6.2", + "openapi-types": "^12.1.3", "p-retry": "4", "uuid": "^9.0.0", "yaml": "^2.2.1", - "zod": "^3.21.4", - "zod-to-json-schema": "^3.20.4" + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + } + }, + "langchainhub": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.8.tgz", + "integrity": "sha512-Woyb8YDHgqqTOZvWIbm2CaFDGfZ4NTSyXV687AG4vXEfoNo7cGQp7nhl7wL3ehenKWmNEmcxCLgOZzW8jE6lOQ==" + }, + "langsmith": { + "version": "0.1.18", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.1.18.tgz", + "integrity": "sha512-LHk0aIFAl3/iiKvUzAiM8Xdm13bRO70XERQeHCF99fL2X815Jc47nxu6m7usSuQC8sw6rirCKZbGm18cqdUEzA==", + "requires": { + "@types/uuid": "^9.0.1", + "commander": "^10.0.1", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0" + } + }, + "md5": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", + "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", + "requires": { + "charenc": "0.0.2", + "crypt": "0.0.2", + "is-buffer": "~1.1.6" } }, "mime-db": { @@ -1600,6 +2663,8 @@ "version": "5.1.6", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "optional": true, + "peer": true, "requires": { "brace-expansion": "^2.0.1" } @@ -1607,7 +2672,9 @@ "mkdirp": { "version": "2.1.6", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-2.1.6.tgz", - "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==" + "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==", + "optional": true, + "peer": true }, "ml-array-mean": { "version": "1.1.6", @@ -1654,20 +2721,32 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "mustache": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", + "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==" + }, "mz": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", + "optional": true, + "peer": true, "requires": { "any-promise": "^1.0.0", "object-assign": "^4.0.1", "thenify-all": "^1.0.0" } }, + "node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==" + }, "node-fetch": { - "version": "2.6.7", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", - "integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==", + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", "requires": { "whatwg-url": "^5.0.0" } @@ -1680,30 +2759,40 @@ "object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==" - }, - "object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==" + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "optional": true, + "peer": true }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "optional": true, + "peer": true, "requires": { "wrappy": "1" } }, "openai": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz", - "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==", + "version": "4.38.5", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.38.5.tgz", + "integrity": "sha512-Ym5GJL98ZhLJJ7enBx53jjG3vwN/fsB+Ozh46nnRZZS9W1NiYqbwkJ+sXd3dkCIiWIgcyyOPL2Zr8SQAzbpj3g==", "requires": { - "axios": "^0.26.0", - "form-data": "^4.0.0" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" } }, + "openapi-types": { + "version": "12.1.3", + "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz", + "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==" + }, "p-finally": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", @@ -1738,12 +2827,16 @@ "parse5": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz", - "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==" + "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==", + "optional": true, + "peer": true }, "parse5-htmlparser2-tree-adapter": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz", "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==", + "optional": true, + "peer": true, "requires": { "parse5": "^6.0.1" }, @@ -1751,19 +2844,25 @@ "parse5": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" + "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==", + "optional": true, + "peer": true } } }, "reflect-metadata": { "version": "0.1.13", "resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.13.tgz", - "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==" + "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==", + "optional": true, + "peer": true }, "require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==" + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "optional": true, + "peer": true }, "retry": { "version": "0.13.1", @@ -1773,12 +2872,16 @@ "safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", - "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "optional": true, + "peer": true }, "sha.js": { "version": "2.4.11", "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", + "optional": true, + "peer": true, "requires": { "inherits": "^2.0.1", "safe-buffer": "^5.0.1" @@ -1788,6 +2891,8 @@ "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "optional": true, + "peer": true, "requires": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", @@ -1798,6 +2903,8 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "optional": true, + "peer": true, "requires": { "ansi-regex": "^5.0.1" } @@ -1806,6 +2913,8 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "optional": true, + "peer": true, "requires": { "has-flag": "^4.0.0" } @@ -1814,6 +2923,8 @@ "version": "3.3.1", "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", + "optional": true, + "peer": true, "requires": { "any-promise": "^1.0.0" } @@ -1822,6 +2933,8 @@ "version": "1.6.0", "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", + "optional": true, + "peer": true, "requires": { "thenify": ">= 3.1.0 < 4" } @@ -1834,12 +2947,16 @@ "tslib": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==", + "optional": true, + "peer": true }, "typeorm": { "version": "0.3.15", "resolved": "https://registry.npmjs.org/typeorm/-/typeorm-0.3.15.tgz", "integrity": "sha512-R4JSw8QjDP1W+ypeRz/XrCXIqubrLSnNAzJAp9EQSQIPHTv+YmUHZis8g08lOwFpuhqL9m8jkPSz8GWEKlU/ow==", + "optional": true, + "peer": true, "requires": { "@sqltools/formatter": "^1.2.5", "app-root-path": "^3.1.0", @@ -1868,6 +2985,11 @@ "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==" }, + "web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==" + }, "webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -1886,6 +3008,8 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "optional": true, + "peer": true, "requires": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", @@ -1896,6 +3020,8 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "requires": { "color-convert": "^2.0.1" } @@ -1905,12 +3031,16 @@ "wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "optional": true, + "peer": true }, "y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "optional": true, + "peer": true }, "yaml": { "version": "2.2.2", @@ -1921,6 +3051,8 @@ "version": "17.7.2", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "optional": true, + "peer": true, "requires": { "cliui": "^8.0.1", "escalade": "^3.1.1", @@ -1934,17 +3066,19 @@ "yargs-parser": { "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==" + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "optional": true, + "peer": true }, "zod": { - "version": "3.21.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", - "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==" + "version": "3.23.4", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.4.tgz", + "integrity": "sha512-/AtWOKbBgjzEYYQRNfoGKHObgfAZag6qUJX1VbHo2PRBgS+wfWagEY2mizjfyAPcGesrJOcx/wcl0L9WnVrHFw==" }, "zod-to-json-schema": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.0.tgz", - "integrity": "sha512-+KyFCzqKwE6CxMSZxEUBaGmdXzB09BoFebO+xef/ISE4cTfReQlyThYbS8aqd3uWkdt9fz5BGHsY0CbY+Ra9oA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", "requires": {} } } diff --git a/examples/langchain/langchainjs-localai-example/package.json b/examples/langchain/langchainjs-localai-example/package.json index a6023c03..1d65575d 100644 --- a/examples/langchain/langchainjs-localai-example/package.json +++ b/examples/langchain/langchainjs-localai-example/package.json @@ -1,6 +1,6 @@ { "name": "langchainjs-localai-example", - "version": "0.1.0", + "version": "0.1.1", "description": "Trivial Example of using langchain + the OpenAI API + LocalAI together", "main": "index.mjs", "scripts": { @@ -15,7 +15,11 @@ "typescript": "^5.0.4" }, "dependencies": { - "langchain": "^0.0.67", - "typeorm": "^0.3.15" + "@langchain/community": "^0.0.52", + "@langchain/openai": "^0.0.28", + "langchain": "^0.1.36" + }, + "overrides": { + "@langchain/core": "0.1.5" } } diff --git a/examples/langchain/langchainjs-localai-example/src/index.mts b/examples/langchain/langchainjs-localai-example/src/index.mts index 11faa384..995c2832 100644 --- a/examples/langchain/langchainjs-localai-example/src/index.mts +++ b/examples/langchain/langchainjs-localai-example/src/index.mts @@ -1,15 +1,17 @@ -import { OpenAIChat } from "langchain/llms/openai"; import { loadQAStuffChain } from "langchain/chains"; import { Document } from "langchain/document"; -import { initializeAgentExecutorWithOptions } from "langchain/agents"; -import {Calculator} from "langchain/tools/calculator"; +import { pull } from "langchain/hub"; +import { AgentExecutor, createOpenAIToolsAgent } from "langchain/agents"; +import {Calculator} from "@langchain/community/tools/calculator"; +import { ChatOpenAI } from "@langchain/openai"; +import type { ChatPromptTemplate } from "@langchain/core/prompts"; const pathToLocalAI = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1'; const fakeApiKey = process.env['OPENAI_API_KEY'] || '-'; const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo'; -function getModel(): OpenAIChat { - return new OpenAIChat({ +function getModel(): ChatOpenAI { + return new ChatOpenAI({ prefixMessages: [ { role: "system", @@ -29,8 +31,8 @@ function getModel(): OpenAIChat { // Minimal example. export const run = async () => { const model = getModel(); - console.log(`about to model.call at ${new Date().toUTCString()}`); - const res = await model.call( + console.log(`about to model.invoke at ${new Date().toUTCString()}`); + const res = await model.invoke( "What would be a good company name a company that makes colorful socks?" ); console.log(`${new Date().toUTCString()}`); @@ -47,7 +49,7 @@ export const run2 = async () => { new Document({ pageContent: "Harrison went to Harvard." }), new Document({ pageContent: "Ankush went to Princeton." }), ]; - const resA = await chainA.call({ + const resA = await chainA.invoke({ input_documents: docs, question: "Where did Harrison go to college?", }); @@ -58,22 +60,33 @@ await run2(); // Quickly thrown together example of using tools + agents. // This seems like it should work, but it doesn't yet. -export const temporarilyBrokenToolTest = async () => { +export const toolAgentTest = async () => { const model = getModel(); - const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, { - agentType: "zero-shot-react-description", + const prompt = await pull("hwchase17/openai-tools-agent"); + + const tools = [new Calculator()]; + + const agent = await createOpenAIToolsAgent({ + llm: model, + tools: tools, + prompt: prompt }); console.log("Loaded agent."); + const agentExecutor = new AgentExecutor({ + agent, + tools, + }); + const input = `What is the value of (500 *2) + 350 - 13?`; console.log(`Executing with input "${input}"...`); - const result = await executor.call({ input }); + const result = await agentExecutor.invoke({ input }); console.log(`Got output ${result.output}`); } -await temporarilyBrokenToolTest(); +await toolAgentTest(); diff --git a/examples/langchain/langchainjs-localai-example/tsconfig.json b/examples/langchain/langchainjs-localai-example/tsconfig.json index 84129d26..5e0d5a58 100644 --- a/examples/langchain/langchainjs-localai-example/tsconfig.json +++ b/examples/langchain/langchainjs-localai-example/tsconfig.json @@ -8,7 +8,8 @@ "esModuleInterop": true, "allowSyntheticDefaultImports": true, "isolatedModules": true, - "outDir": "./dist" + "outDir": "./dist", + "skipLibCheck": true }, "include": ["src", "test"], "exclude": ["node_modules", "dist"] From 6b411ae2129e7520c0ea03d0685d3eeb788003cf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Apr 2024 00:48:06 +0200 Subject: [PATCH 0382/2895] models(gallery): add variants of llama3 70b (#2138) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 56e434c5..9cc72d79 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -82,6 +82,24 @@ - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf +- <<: *llama3 + name: "llama3-70b-instruct:IQ1_M" + overrides: + parameters: + model: Meta-Llama-3-70B-Instruct.IQ1_M.gguf + files: + - filename: Meta-Llama-3-70B-Instruct.IQ1_M.gguf + sha256: cdbe8ac2126a70fa0af3fac7a4fe04f1c76330c50eba8383567587b48b328098 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.IQ1_M.gguf +- <<: *llama3 + name: "llama3-70b-instruct:IQ1_S" + overrides: + parameters: + model: Meta-Llama-3-70B-Instruct.IQ1_S.gguf + files: + - filename: Meta-Llama-3-70B-Instruct.IQ1_S.gguf + sha256: 3797a69f1bdf53fabf9f3a3a8c89730b504dd3209406288515c9944c14093048 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.IQ1_S.gguf - <<: *llama3 name: "llama-3-sauerkrautlm-8b-instruct" urls: From 44bc540bb5da107c38613290d23736bf59da13bd Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 04:33:12 -0400 Subject: [PATCH 0383/2895] fix: security scanner dislikes `runCommand` function arguments (#2140) runCommand ==> ffmpegCommand. No functional changes, but makes it clear to the security scanner and future developers that this function cannot run arbitrary commands Signed-off-by: Dave Lee --- backend/go/transcribe/transcript.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index fdfaa974..74833e4d 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -11,8 +11,8 @@ import ( "github.com/go-skynet/LocalAI/core/schema" ) -func runCommand(command []string) (string, error) { - cmd := exec.Command(command[0], command[1:]...) +func ffmpegCommand(args []string) (string, error) { + cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe. cmd.Env = os.Environ() out, err := cmd.CombinedOutput() return string(out), err @@ -21,8 +21,8 @@ func runCommand(command []string) (string, error) { // AudioToWav converts audio to wav for transcribe. // TODO: use https://github.com/mccoyst/ogg? func audioToWav(src, dst string) error { - command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} - out, err := runCommand(command) + commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} + out, err := ffmpegCommand(commandArgs) if err != nil { return fmt.Errorf("error: %w out: %s", err, out) } From 2cd4936c997187c7422ba36167f33323b5cf19f7 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 04:34:31 -0400 Subject: [PATCH 0384/2895] fix: security scanner warning noise: error handlers part 1 (#2141) first group of error handlers to reduce security scanner warning noise level Signed-off-by: Dave Lee --- core/backend/options.go | 11 ++++++++--- core/startup/startup.go | 5 ++++- embedded/embedded.go | 6 +++++- examples/semantic-todo/main.go | 5 ++++- main.go | 6 +++++- pkg/assets/list.go | 7 ++++++- pkg/grpc/server.go | 4 ++-- 7 files changed, 34 insertions(+), 10 deletions(-) diff --git a/core/backend/options.go b/core/backend/options.go index bbb9990d..4a7435e6 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -7,7 +7,8 @@ import ( "github.com/go-skynet/LocalAI/core/config" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/rs/zerolog/log" ) func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { @@ -109,8 +110,12 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption promptCachePath := "" if c.PromptCachePath != "" { p := filepath.Join(modelPath, c.PromptCachePath) - os.MkdirAll(filepath.Dir(p), 0750) - promptCachePath = p + err := os.MkdirAll(filepath.Dir(p), 0750) + if err == nil { + promptCachePath = p + } else { + log.Error().Err(err).Str("promptCachePath", promptCachePath).Msg("error creating prompt cache folder") + } } return &pb.PredictOptions{ diff --git a/core/startup/startup.go b/core/startup/startup.go index b9e95ebf..17bbf9f5 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -122,7 +122,10 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode // Watch the configuration directory // If the directory does not exist, we don't watch it configHandler := newConfigFileHandler(options) - configHandler.Watch() + err = configHandler.Watch() + if err != nil { + log.Error().Err(err).Msg("error establishing configuration directory watcher") + } log.Info().Msg("core/startup process completed!") return cl, ml, options, nil diff --git a/embedded/embedded.go b/embedded/embedded.go index c779fc26..438a1352 100644 --- a/embedded/embedded.go +++ b/embedded/embedded.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/rs/zerolog/log" "github.com/go-skynet/LocalAI/pkg/assets" "gopkg.in/yaml.v3" @@ -29,7 +30,10 @@ func ModelShortURL(s string) string { } func init() { - yaml.Unmarshal(modelLibrary, &modelShorteners) + err := yaml.Unmarshal(modelLibrary, &modelShorteners) + if err != nil { + log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary") + } } func GetRemoteLibraryShorteners(url string) (map[string]string, error) { diff --git a/examples/semantic-todo/main.go b/examples/semantic-todo/main.go index 371fe6b9..a8936ea1 100644 --- a/examples/semantic-todo/main.go +++ b/examples/semantic-todo/main.go @@ -239,7 +239,10 @@ func (app *App) updateUI() { task := Task{Description: inputField.GetText()} app.tasks = append(app.tasks, task) app.state = StateRoot - postTasksToExternalService([]Task{task}) + err := postTasksToExternalService([]Task{task}) + if err != nil { + panic(err) + } } app.updateUI() }) diff --git a/main.go b/main.go index 04f13d3f..8fb50184 100644 --- a/main.go +++ b/main.go @@ -43,7 +43,11 @@ func main() { for _, envFile := range envFiles { if _, err := os.Stat(envFile); err == nil { log.Info().Str("envFile", envFile).Msg("loading environment variables from file") - godotenv.Load(envFile) + err = godotenv.Load(envFile) + if err != nil { + log.Error().Err(err).Str("envFile", envFile).Msg("failed to load environment variables from file") + continue + } } } diff --git a/pkg/assets/list.go b/pkg/assets/list.go index 7b705b49..47e60a40 100644 --- a/pkg/assets/list.go +++ b/pkg/assets/list.go @@ -3,10 +3,12 @@ package assets import ( "embed" "io/fs" + + "github.com/rs/zerolog/log" ) func ListFiles(content embed.FS) (files []string) { - fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error { + err := fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err } @@ -18,5 +20,8 @@ func ListFiles(content embed.FS) (files []string) { files = append(files, path) return nil }) + if err != nil { + log.Error().Err(err).Msg("error walking the embedded filesystem") + } return } diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go index 8116241f..8e9f4d68 100644 --- a/pkg/grpc/server.go +++ b/pkg/grpc/server.go @@ -131,10 +131,10 @@ func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictS done <- true }() - s.llm.PredictStream(in, resultChan) + err := s.llm.PredictStream(in, resultChan) <-done - return nil + return err } func (s *server) TokenizeString(ctx context.Context, in *pb.PredictOptions) (*pb.TokenizationResponse, error) { From 006306b183e006c35f109b3d5cb7fc059b9b4229 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 04:34:50 -0400 Subject: [PATCH 0385/2895] fix: use bluemonday as recommended by blackfriday (#2142) use bluemonday as recommended by blackfriday Signed-off-by: Dave Lee --- core/http/render.go | 3 ++- go.mod | 10 +++++----- go.sum | 10 ++++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/core/http/render.go b/core/http/render.go index 8f1b36c6..1becf0b2 100644 --- a/core/http/render.go +++ b/core/http/render.go @@ -10,6 +10,7 @@ import ( "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" fiberhtml "github.com/gofiber/template/html/v2" + "github.com/microcosm-cc/bluemonday" "github.com/russross/blackfriday" ) @@ -39,5 +40,5 @@ func renderEngine() *fiberhtml.Engine { func markDowner(args ...interface{}) template.HTML { s := blackfriday.MarkdownCommon([]byte(fmt.Sprintf("%s", args...))) - return template.HTML(s) + return template.HTML(bluemonday.UGCPolicy().Sanitize(string(s))) } diff --git a/go.mod b/go.mod index 9485383e..15846cd4 100644 --- a/go.mod +++ b/go.mod @@ -93,7 +93,7 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/gorilla/css v1.0.0 // indirect + github.com/gorilla/css v1.0.1 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/klauspost/pgzip v1.2.5 // indirect @@ -133,9 +133,9 @@ require ( github.com/yuin/goldmark-emoji v1.0.2 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect - golang.org/x/crypto v0.21.0 // indirect + golang.org/x/crypto v0.22.0 // indirect golang.org/x/mod v0.16.0 // indirect - golang.org/x/term v0.18.0 // indirect + golang.org/x/term v0.19.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect @@ -161,8 +161,8 @@ require ( github.com/rivo/uniseg v0.2.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect - golang.org/x/net v0.22.0 // indirect - golang.org/x/sys v0.18.0 // indirect + golang.org/x/net v0.24.0 // indirect + golang.org/x/sys v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.19.0 // indirect ) diff --git a/go.sum b/go.sum index b68834b2..1ca56a72 100644 --- a/go.sum +++ b/go.sum @@ -146,6 +146,8 @@ github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= +github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= +github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= @@ -377,6 +379,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -395,6 +399,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -431,12 +437,16 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= +golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From c9451cb604d9325fadf678e1e511d7e8e5367e8a Mon Sep 17 00:00:00 2001 From: fakezeta Date: Fri, 26 Apr 2024 16:20:43 +0200 Subject: [PATCH 0386/2895] Bump oneapi-basekit, optimum and openvino (#2139) * Bump oneapi-basekit, optimum and openvino * Changed PERFORMANCE HINT to CUMULATIVE_THROUGHPUT Minor latency change for first token but about 10-15% speedup on token generation. --- .github/workflows/image-pr.yml | 4 ++-- .github/workflows/image.yml | 12 ++++++------ Makefile | 4 ++-- .../python/common-env/transformers/transformers.yml | 7 ++++--- backend/python/transformers/transformers_server.py | 2 +- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 9c4fece7..3df5cd5a 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -68,7 +68,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg' ffmpeg: 'true' @@ -110,7 +110,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg-core' ffmpeg: 'true' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 255c1c65..43b7052e 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -148,7 +148,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'auto' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg' ffmpeg: 'true' @@ -161,7 +161,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'auto' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg' ffmpeg: 'true' @@ -175,7 +175,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-core' ffmpeg: 'false' @@ -185,7 +185,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-core' ffmpeg: 'false' @@ -195,7 +195,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' @@ -205,7 +205,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' diff --git a/Makefile b/Makefile index 7d64ad03..3ebe13f3 100644 --- a/Makefile +++ b/Makefile @@ -707,7 +707,7 @@ docker-aio-all: docker-image-intel: docker build \ - --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ @@ -715,7 +715,7 @@ docker-image-intel: docker-image-intel-xpu: docker build \ - --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 5c069dd0..5f4e85b9 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -60,9 +60,10 @@ dependencies: - networkx - numpy==1.26.0 - onnx==1.15.0 - - openvino==2024.0.0 - - openvino-telemetry==2023.2.1 - - optimum[openvino]==1.17.1 + - openvino==2024.1.0 + - openvino-telemetry==2024.1.0 + - optimum[openvino]==1.19.1 + - optimum-intel==1.16.1 - packaging==23.2 - pandas - peft==0.5.0 diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 2f4140c2..a27c24da 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -150,7 +150,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.model = OVModelForCausalLM.from_pretrained(model_name, compile=True, trust_remote_code=request.TrustRemoteCode, - ov_config={"PERFORMANCE_HINT": "LATENCY"}, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, device=device_map) self.OV = True else: From 2dc1fa247424ad962cdddbc1d440ae7c56c32095 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 12:46:35 -0400 Subject: [PATCH 0387/2895] fix: `config_file_watcher.go` - root all file reads for safety (#2144) callHandler() now has all file access rooted within DynamicConfigDir Signed-off-by: Dave Lee --- core/startup/config_file_watcher.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 800059d0..6bbb367f 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -21,7 +21,6 @@ type configFileHandler struct { watcher *fsnotify.Watcher - configDir string appConfig *config.ApplicationConfig } @@ -30,7 +29,6 @@ type configFileHandler struct { func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler { c := configFileHandler{ handlers: make(map[string]fileHandler), - configDir: appConfig.DynamicConfigsDir, appConfig: appConfig, } c.Register("api_keys.json", readApiKeysJson(*appConfig), true) @@ -45,16 +43,17 @@ func (c *configFileHandler) Register(filename string, handler fileHandler, runNo } c.handlers[filename] = handler if runNow { - c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler) + c.callHandler(filename, handler) } return nil } func (c *configFileHandler) callHandler(filename string, handler fileHandler) { - log.Trace().Str("filename", filename).Msg("reading file for dynamic config update") - fileContent, err := os.ReadFile(filename) + rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename)) + log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update") + fileContent, err := os.ReadFile(rootedFilePath) if err != nil && !os.IsNotExist(err) { - log.Error().Err(err).Str("filename", filename).Msg("could not read file") + log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file") } if err = handler(fileContent, c.appConfig); err != nil { @@ -66,7 +65,8 @@ func (c *configFileHandler) Watch() error { configWatcher, err := fsnotify.NewWatcher() c.watcher = configWatcher if err != nil { - log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory") + log.Fatal().Err(err).Str("configdir", c.appConfig.DynamicConfigsDir).Msg("unable to create a watcher for configuration directory") + } if c.appConfig.DynamicConfigsDirPollInterval > 0 { @@ -77,7 +77,7 @@ func (c *configFileHandler) Watch() error { <-ticker.C for file, handler := range c.handlers { log.Debug().Str("file", file).Msg("polling config file") - c.callHandler(filepath.Join(c.appConfig.DynamicConfigsDir, file), handler) + c.callHandler(file, handler) } } }() @@ -97,7 +97,7 @@ func (c *configFileHandler) Watch() error { continue } - c.callHandler(event.Name, handler) + c.callHandler(filepath.Base(event.Name), handler) } case err, ok := <-c.watcher.Errors: log.Error().Err(err).Msg("config watcher error received") From 56d843c263ce72409baefe2201130737c052239d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 27 Apr 2024 01:03:10 +0200 Subject: [PATCH 0388/2895] :arrow_up: Update docs version mudler/LocalAI (#2149) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 6a618115..ad22e2aa 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.4" + "version": " 🖼️ v2.13.0 - Model gallery edition" } From 030d55599562fbe2586760e93eb47fe58631e60c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 27 Apr 2024 04:18:28 +0200 Subject: [PATCH 0389/2895] :arrow_up: Update ggerganov/llama.cpp (#2150) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3ebe13f3..15aea6ce 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=46e12c4692a37bdd31a0432fc5153d7d22bc7f72 +CPPLLAMA_VERSION?=928e0b7013c862cf10701957b3d654aa70f11bd8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From fb2a05ff43b6ce70835c9fd29eccb0fa76ac8da5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 09:08:33 +0200 Subject: [PATCH 0390/2895] feat(gallery): display job status also during navigation (#2151) * feat(gallery): keep showing progress also when refreshing Signed-off-by: Ettore Di Giacinto * fix(intel-gpu): better defaults Signed-off-by: Ettore Di Giacinto * feat: make it thread-safe Signed-off-by: mudler --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: mudler --- core/config/backend_config.go | 8 +++- core/http/elements/gallery.go | 19 ++++++++- core/http/routes/ui.go | 30 +++++++++++--- pkg/xsync/map.go | 77 +++++++++++++++++++++++++++++++++++ pkg/xsync/map_test.go | 26 ++++++++++++ pkg/xsync/sync_suite_test.go | 13 ++++++ 6 files changed, 164 insertions(+), 9 deletions(-) create mode 100644 pkg/xsync/map.go create mode 100644 pkg/xsync/map_test.go create mode 100644 pkg/xsync/sync_suite_test.go diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 64182e75..35e0776d 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -238,7 +238,13 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { if cfg.MMap == nil { // MMap is enabled by default - cfg.MMap = &trueV + + // Only exception is for Intel GPUs + if os.Getenv("XPU") != "" { + cfg.MMap = &falseV + } else { + cfg.MMap = &trueV + } } if cfg.MMlock == nil { diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index c03750da..6edbd23d 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -6,6 +6,7 @@ import ( "github.com/chasefleming/elem-go" "github.com/chasefleming/elem-go/attrs" "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/xsync" ) const ( @@ -102,7 +103,8 @@ func cardSpan(text, icon string) elem.Node { ) } -func ListModels(models []*gallery.GalleryModel) string { +func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[string, string]) string { + //StartProgressBar(uid, "0") modelsElements := []elem.Node{} span := func(s string) elem.Node { return elem.Span( @@ -118,6 +120,7 @@ func ListModels(models []*gallery.GalleryModel) string { "data-twe-ripple-init": "", "data-twe-ripple-color": "light", "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-swap": "outerHTML", // post the Model ID as param "hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name), }, @@ -152,6 +155,9 @@ func ListModels(models []*gallery.GalleryModel) string { } actionDiv := func(m *gallery.GalleryModel) elem.Node { + galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) + currentlyInstalling := installing.Exists(galleryID) + nodes := []elem.Node{ cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"), } @@ -193,7 +199,16 @@ func ListModels(models []*gallery.GalleryModel) string { }, nodes..., ), - elem.If(m.Installed, span("Installed"), installButton(m)), + elem.If( + currentlyInstalling, + elem.Node( // If currently installing, show progress bar + elem.Raw(StartProgressBar(installing.Get(galleryID), "0")), + ), // Otherwise, show install button (if not installed) or display "Installed" + elem.If(m.Installed, + span("Installed"), + installButton(m), + ), + ), ) } diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index c64ec5ff..b63b1870 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -10,6 +10,8 @@ import ( "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/xsync" + "github.com/gofiber/fiber/v2" "github.com/google/uuid" ) @@ -21,13 +23,16 @@ func RegisterUIRoutes(app *fiber.App, galleryService *services.GalleryService, auth func(*fiber.Ctx) error) { - // Show the Models page + // keeps the state of models that are being installed from the UI + var installingModels = xsync.NewSyncedMap[string, string]() + + // Show the Models page (all models) app.Get("/browse", auth, func(c *fiber.Ctx) error { models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) summary := fiber.Map{ "Title": "LocalAI - Models", - "Models": template.HTML(elements.ListModels(models)), + "Models": template.HTML(elements.ListModels(models, installingModels)), "Repositories": appConfig.Galleries, // "ApplicationConfig": appConfig, } @@ -36,7 +41,7 @@ func RegisterUIRoutes(app *fiber.App, return c.Render("views/models", summary) }) - // HTMX: return the model details + // Show the models, filtered from the user input // https://htmx.org/examples/active-search/ app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error { form := struct { @@ -58,12 +63,13 @@ func RegisterUIRoutes(app *fiber.App, } } - return c.SendString(elements.ListModels(filteredModels)) + return c.SendString(elements.ListModels(filteredModels, installingModels)) }) + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service // https://htmx.org/examples/progress-bar/ app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { - galleryID := strings.Clone(c.Params("id")) // strings.Clone is required! + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! id, err := uuid.NewUUID() if err != nil { @@ -72,6 +78,8 @@ func RegisterUIRoutes(app *fiber.App, uid := id.String() + installingModels.Set(galleryID, uid) + op := gallery.GalleryOp{ Id: uid, GalleryName: galleryID, @@ -84,6 +92,8 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.StartProgressBar(uid, "0")) }) + // Display the job current progress status + // If the job is done, we trigger the /browse/job/:uid route // https://htmx.org/examples/progress-bar/ app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { jobUID := c.Params("uid") @@ -95,7 +105,7 @@ func RegisterUIRoutes(app *fiber.App, } if status.Progress == 100 { - c.Set("HX-Trigger", "done") + c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done) return c.SendString(elements.ProgressBar("100")) } if status.Error != nil { @@ -105,7 +115,15 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) }) + // this route is hit when the job is done, and we display the + // final state (for now just displays "Installation completed") app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + for _, k := range installingModels.Keys() { + if installingModels.Get(k) == c.Params("uid") { + installingModels.Delete(k) + } + } + return c.SendString(elements.DoneProgress(c.Params("uid"))) }) } diff --git a/pkg/xsync/map.go b/pkg/xsync/map.go new file mode 100644 index 00000000..9c3a471e --- /dev/null +++ b/pkg/xsync/map.go @@ -0,0 +1,77 @@ +package xsync + +import ( + "sync" +) + +type SyncedMap[K comparable, V any] struct { + mu sync.RWMutex + m map[K]V +} + +func NewSyncedMap[K comparable, V any]() *SyncedMap[K, V] { + return &SyncedMap[K, V]{ + m: make(map[K]V), + } +} + +func (m *SyncedMap[K, V]) Get(key K) V { + m.mu.RLock() + defer m.mu.RUnlock() + return m.m[key] +} + +func (m *SyncedMap[K, V]) Keys() []K { + m.mu.RLock() + defer m.mu.RUnlock() + keys := make([]K, 0, len(m.m)) + for k := range m.m { + keys = append(keys, k) + } + return keys +} + +func (m *SyncedMap[K, V]) Values() []V { + m.mu.RLock() + defer m.mu.RUnlock() + values := make([]V, 0, len(m.m)) + for _, v := range m.m { + values = append(values, v) + } + return values +} + +func (m *SyncedMap[K, V]) Len() int { + m.mu.RLock() + defer m.mu.RUnlock() + return len(m.m) +} + +func (m *SyncedMap[K, V]) Iterate(f func(key K, value V) bool) { + m.mu.RLock() + defer m.mu.RUnlock() + for k, v := range m.m { + if !f(k, v) { + break + } + } +} + +func (m *SyncedMap[K, V]) Set(key K, value V) { + m.mu.Lock() + m.m[key] = value + m.mu.Unlock() +} + +func (m *SyncedMap[K, V]) Delete(key K) { + m.mu.Lock() + delete(m.m, key) + m.mu.Unlock() +} + +func (m *SyncedMap[K, V]) Exists(key K) bool { + m.mu.RLock() + defer m.mu.RUnlock() + _, ok := m.m[key] + return ok +} diff --git a/pkg/xsync/map_test.go b/pkg/xsync/map_test.go new file mode 100644 index 00000000..a7ecfbcc --- /dev/null +++ b/pkg/xsync/map_test.go @@ -0,0 +1,26 @@ +package xsync_test + +import ( + . "github.com/go-skynet/LocalAI/pkg/xsync" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("SyncMap", func() { + + Context("Syncmap", func() { + It("sets and gets", func() { + m := NewSyncedMap[string, string]() + m.Set("foo", "bar") + Expect(m.Get("foo")).To(Equal("bar")) + }) + It("deletes", func() { + m := NewSyncedMap[string, string]() + m.Set("foo", "bar") + m.Delete("foo") + Expect(m.Get("foo")).To(Equal("")) + Expect(m.Exists("foo")).To(Equal(false)) + }) + }) +}) diff --git a/pkg/xsync/sync_suite_test.go b/pkg/xsync/sync_suite_test.go new file mode 100644 index 00000000..0dad9c66 --- /dev/null +++ b/pkg/xsync/sync_suite_test.go @@ -0,0 +1,13 @@ +package xsync_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestSync(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI sync test") +} From 4c97406f2b3f10b68165caa7a4dfe09f01dc2cf0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 12:30:15 +0200 Subject: [PATCH 0391/2895] models(gallery): add Einstein v6.1 (#2152) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 9cc72d79..cf4e332a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -31,6 +31,29 @@ - python ## LLMs ### START LLAMA3 +- name: "einstein-v6.1-llama3-8b" + url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/5s12oq859qLfDkkTNam_C.png + urls: + - https://huggingface.co/Weyaxi/Einstein-v6.1-Llama3-8B + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + license: llama3 + description: | + This model is a full fine-tuned version of meta-llama/Meta-Llama-3-8B on diverse datasets. + + This model is finetuned using 8xRTX3090 + 1xRTXA6000 using axolotl. + overrides: + parameters: + model: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf + files: + - filename: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf + sha256: 3ef96fd6e32658774b3c8fbc24088787dfa911288e272b186f448c886400d30d + uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" name: "llama3-8b-instruct" From 935f4c23f64c79d21bf44c5821877bbc382d811d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 17:53:56 +0200 Subject: [PATCH 0392/2895] models(gallery): add SOVL (#2154) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index cf4e332a..2162ba52 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -236,6 +236,22 @@ - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "sovl_llama3_8b-gguf-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/N_1D87adbMuMlSIQ5rI3_.png + description: | + I'm not gonna tell you this is the best model anyone has ever made. I'm not going to tell you that you will love chatting with SOVL. + + What I am gonna say is thank you for taking the time out of your day. Without users like you, my work would be meaningless. + overrides: + parameters: + model: SOVL_Llama3_8B-Q4_K_M-imat.gguf + files: + - filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf + sha256: ee61890dd26d52985a3c44279d519ca8592448ddeb46387cf22868548703d686 + uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: From 1f8461767d0b728cbee1805ebeffa0b59d58f6a8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 17:59:31 +0200 Subject: [PATCH 0393/2895] models(gallery): add average_normie (#2155) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2162ba52..4d26030c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -252,6 +252,34 @@ - filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf sha256: ee61890dd26d52985a3c44279d519ca8592448ddeb46387cf22868548703d686 uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "average_normie_l3_v1_8b-gguf-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/dvNIj1rSTjBvgs3XJfqXK.png + description: | + A model by an average normie for the average normie. + + This model is a stock merge of the following models: + + https://huggingface.co/cgato/L3-TheSpice-8b-v0.1.3 + + https://huggingface.co/Sao10K/L3-Solana-8B-v1 + + https://huggingface.co/ResplendentAI/Kei_Llama3_8B + + The final merge then had the following LoRA applied over it: + + https://huggingface.co/ResplendentAI/Theory_of_Mind_Llama3 + + This should be an intelligent and adept roleplaying model. + overrides: + parameters: + model: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf + files: + - filename: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf + sha256: 9e98cd2672f716a0872912fdc4877969efd14d6f682f28e156f8591591c00d9c + uri: huggingface://Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix/Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: From 164be58445066b8756e7a0eca96290fa2a63fc42 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 18:10:58 +0200 Subject: [PATCH 0394/2895] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 343a7cf5..7fe1a598 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 Hot topics (looking for contributors): + +- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156 - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - Assistant API: https://github.com/mudler/LocalAI/issues/1273 From 9fc013599118dc49dd77b0993864f14d6dcb7836 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 27 Apr 2024 12:48:20 -0500 Subject: [PATCH 0395/2895] feat: cleanup Dockerfile and make final image a little smaller (#2146) * feat: cleanup Dockerfile and make final image a little smaller Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add build-essential to final stage Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more GRPC cache misses Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: correct for another cause of GRPC cache misses Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: generate new GRPC cache automatically if needed Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: use new GRPC_MAKEFLAGS build arg in GRPC cache generation Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 8 +- .github/workflows/image_build.yml | 9 +- Dockerfile | 144 ++++++++++++++------- 3 files changed, 112 insertions(+), 49 deletions(-) diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index c6b080b5..deda6084 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -1,7 +1,10 @@ name: 'generate and publish GRPC docker caches' on: -- workflow_dispatch + workflow_dispatch: + push: + branches: + - master concurrency: group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }} @@ -80,11 +83,12 @@ jobs: # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. build-args: | GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} - MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.58.0 context: . file: ./Dockerfile cache-to: type=gha,ignore-error=true + cache-from: type=gha target: grpc platforms: ${{ matrix.platforms }} push: false \ No newline at end of file diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index b06100ff..fb1985fd 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -210,7 +210,7 @@ jobs: # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. build-args: | GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} - MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.58.0 context: . file: ./Dockerfile @@ -225,6 +225,10 @@ jobs: uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} + # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. + # This means that even the MAKEFLAGS have to be an EXACT match. + # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. + # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded build-args: | BUILD_TYPE=${{ inputs.build-type }} CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} @@ -232,6 +236,9 @@ jobs: FFMPEG=${{ inputs.ffmpeg }} IMAGE_TYPE=${{ inputs.image-type }} BASE_IMAGE=${{ inputs.base-image }} + GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} + GRPC_MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_VERSION=v1.58.0 MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile diff --git a/Dockerfile b/Dockerfile index 4d12cb56..717b3a3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,22 +21,22 @@ ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ - apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + python3-pip \ + unzip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz -ENV PATH $PATH:/usr/local/go/bin +ENV PATH $PATH:/root/go/bin:/usr/local/go/bin # Install grpc compilers -ENV PATH $PATH:/root/go/bin RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest -# Install protobuf (the version in 22.04 is too old) -RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ - unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ - rm protoc.zip - # Install grpcio-tools (the version in 22.04 is too old) RUN pip install --user grpcio-tools @@ -49,12 +49,21 @@ RUN echo "Target Variant: $TARGETVARIANT" # CuBLAS requirements RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ - apt-get install -y software-properties-common && \ - curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - rm -f cuda-keyring_1.1-1_all.deb && \ - apt-get update && \ - apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + software-properties-common && \ + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + rm -f cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi # Cuda @@ -64,10 +73,12 @@ ENV PATH /usr/local/cuda/bin:${PATH} ENV PATH /opt/rocm/bin:${PATH} # OpenBLAS requirements and stable diffusion -RUN apt-get install -y \ - libopenblas-dev \ - libopencv-dev \ - && apt-get clean +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libopenblas-dev \ + libopencv-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Set up OpenCV RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 @@ -82,24 +93,37 @@ RUN test -n "$TARGETARCH" \ FROM requirements-core AS requirements-extras -RUN apt install -y gpg && \ +RUN apt-get update && \ + apt-get install -y --no-install-recommends gpg && \ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \ apt-get update && \ - apt-get install -y conda && apt-get clean + apt-get install -y --no-install-recommends \ + conda && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* ENV PATH="/root/.cargo/bin:${PATH}" -RUN apt-get install -y python3-pip && apt-get clean -RUN pip install --upgrade pip +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3-pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + pip install --upgrade pip RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -RUN apt-get install -y espeak-ng espeak && apt-get clean +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + espeak-ng \ + espeak && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* RUN if [ ! -e /usr/bin/python ]; then \ - ln -s /usr/bin/python3 /usr/bin/python \ + ln -s /usr/bin/python3 /usr/bin/python \ ; fi ################################### @@ -107,15 +131,20 @@ RUN if [ ! -e /usr/bin/python ]; then \ FROM ${GRPC_BASE_IMAGE} AS grpc -ARG MAKEFLAGS +# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI +ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_VERSION=v1.58.0 -ENV MAKEFLAGS=${MAKEFLAGS} +ENV MAKEFLAGS=${GRPC_MAKEFLAGS} WORKDIR /build RUN apt-get update && \ - apt-get install -y build-essential cmake git && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + cmake \ + git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -123,8 +152,12 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall WORKDIR /build/grpc/cmake/build -RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \ - make +# We install GRPC to a different prefix here so that we can copy in only the build artifacts later +# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree +# and running make install in the target container +RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ + make && \ + make install ################################### ################################### @@ -149,7 +182,10 @@ COPY .git . RUN echo "GO_TAGS: $GO_TAGS" RUN apt-get update && \ - apt-get install -y build-essential cmake git && \ + apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -157,26 +193,33 @@ RUN make prepare # If we are building with clblas support, we need the libraries for the builds RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y libclblast-dev && \ - apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + libclblast-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi +# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below +# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only +# here so that we can generate the grpc code for the stablediffusion build +RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + # stablediffusion does not tolerate a newer version of abseil, build it first RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build -COPY --from=grpc /build/grpc ./grpc/ - -WORKDIR /build/grpc/cmake/build -RUN make install +# Install the pre-built GRPC +COPY --from=grpc /opt/grpc /usr/local # Rebuild with defaults backends WORKDIR /build RUN make build RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ - mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \ - touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \ + mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \ + touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \ ; fi ################################### @@ -203,18 +246,27 @@ ENV PIP_CACHE_PURGE=true # Add FFmpeg RUN if [ "${FFMPEG}" = "true" ]; then \ - apt-get install -y ffmpeg && apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + ffmpeg && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi # Add OpenCL RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y libclblast1 && \ - apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + libclblast1 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi RUN apt-get update && \ - apt-get install -y cmake git && \ + apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -227,9 +279,9 @@ WORKDIR /build COPY . . COPY --from=builder /build/sources ./sources/ -COPY --from=grpc /build/grpc ./grpc/ +COPY --from=grpc /opt/grpc /usr/local -RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc +RUN make prepare-sources # Copy the binary COPY --from=builder /build/local-ai ./ From 7e6bf6e7a177848df28e5e0cdfb39b94a43c8c4b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 19:52:26 +0200 Subject: [PATCH 0396/2895] ci: add auto-label rule for gallery in labeler.yml Signed-off-by: Ettore Di Giacinto --- .github/labeler.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index 64a88f43..687a90d1 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -8,6 +8,11 @@ kind/documentation: - changed-files: - any-glob-to-any-file: '*.md' +area/ai-model: +- any: + - changed-files: + - any-glob-to-any-file: 'gallery/*' + examples: - any: - changed-files: @@ -16,4 +21,4 @@ examples: ci: - any: - changed-files: - - any-glob-to-any-file: '.github/*' \ No newline at end of file + - any-glob-to-any-file: '.github/*' From c3982212f9946ab3a1d92e1444bea871febdae0c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 27 Apr 2024 23:32:43 +0200 Subject: [PATCH 0397/2895] :arrow_up: Update ggerganov/llama.cpp (#2159) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 15aea6ce..6ef6e9ab 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=928e0b7013c862cf10701957b3d654aa70f11bd8 +CPPLLAMA_VERSION?=4dba7e8114d84241c842b986e008af8b88d1a019 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 773d5d23d59c7215e5bf55df06ee937652ae5d9b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 11:57:22 +0200 Subject: [PATCH 0398/2895] models(gallery): add solana (#2157) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4d26030c..2ba1558f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -252,6 +252,23 @@ - filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf sha256: ee61890dd26d52985a3c44279d519ca8592448ddeb46387cf22868548703d686 uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "l3-solana-8b-v1-gguf" + url: "github:mudler/LocalAI/gallery/solana.yaml@master" + license: cc-by-nc-4.0 + urls: + - https://huggingface.co/Sao10K/L3-Solana-8B-v1-GGUF + description: | + A Full Fine-Tune of meta-llama/Meta-Llama-3-8B done with 2x A100 80GB on ~75M Tokens worth of Instruct, and Multi-Turn complex conversations, of up to 8192 tokens long sequence lengths. + + Trained as a generalist instruct model that should be able to handle certain unsavoury topics. It could roleplay too, as a side bonus. + overrides: + parameters: + model: L3-Solana-8B-v1.q5_K_M.gguf + files: + - filename: L3-Solana-8B-v1.q5_K_M.gguf + sha256: 9b8cd2c3beaab5e4f82efd10e7d44f099ad40a4e0ee286ca9fce02c8eec26d2f + uri: huggingface://Sao10K/L3-Solana-8B-v1-GGUF/L3-Solana-8B-v1.q5_K_M.gguf - <<: *llama3 name: "average_normie_l3_v1_8b-gguf-iq-imatrix" urls: From 0f0ae13ad05d60312dc6ff72bf53f05d548d6daf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:01:01 +0200 Subject: [PATCH 0399/2895] models(gallery): add poppy porpoise (#2158) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2ba1558f..fdea1d9e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -495,6 +495,32 @@ - filename: llava-v1.5-7b-mmproj-Q8_0.gguf sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf +- <<: *llama3 + name: "poppy_porpoise-v0.7-l3-8b-iq-imatrix" + description: | + "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. + + Update: Vision/multimodal capabilities again! + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/v6AZmbk-Cb52KskTQTwzW.png + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + - llava-1.5 + overrides: + mmproj: Llava_1.5_Llama3_mmproj.gguf + parameters: + model: Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf + files: + - filename: Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf + sha256: 04badadd6c88cd9c706efef8f5cd337057c805e43dd440a5936f87720c37eb33 + uri: huggingface://Lewdiculous/Poppy_Porpoise-v0.7-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf + - filename: Llava_1.5_Llama3_mmproj.gguf + sha256: d2a9ca943975f6c49c4d55886e873f676a897cff796e92410ace6c20f4efd03b + uri: huggingface://ChaoticNeutrals/Llava_1.5_Llama3_mmproj/mmproj-model-f16.gguf ### START Phi-2 - &phi-2-chat url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" From a248ede222bf5d3761a91fae827be4489d5590d6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:02:50 +0200 Subject: [PATCH 0400/2895] models(gallery): add Undi95/Llama-3-LewdPlay-8B-evo-GGUF (#2160) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index fdea1d9e..d4e8d6b4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -218,6 +218,23 @@ - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf +- <<: *llama3 + name: "llama-3-lewdplay-8b-evo" + urls: + - https://huggingface.co/Undi95/Llama-3-LewdPlay-8B-evo-GGUF + description: | + This is a merge of pre-trained language models created using mergekit. + + The new EVOLVE merge method was used (on MMLU specifically), see below for more information! + + Unholy was used for uncensoring, Roleplay Llama 3 for the DPO train he got on top, and LewdPlay for the... lewd side. + overrides: + parameters: + model: Llama-3-LewdPlay-8B-evo.q8_0.gguf + files: + - filename: Llama-3-LewdPlay-8B-evo.q8_0.gguf + sha256: 1498152d598ff441f73ec6af9d3535875302e7251042d87feb7e71a3618966e8 + uri: huggingface://Undi95/Llama-3-LewdPlay-8B-evo-GGUF/Llama-3-LewdPlay-8B-evo.q8_0.gguf - <<: *llama3 name: "chaos-rp_l3_b-iq-imatrix" urls: From a8089494fdcaa4cfbc1997f3d32cd225ed395de1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:04:04 +0200 Subject: [PATCH 0401/2895] models(gallery): add biomistral-7b (#2161) * models(gallery): add biomistral-7b Signed-off-by: Ettore Di Giacinto * add <|end_of_text|> to llama3 as stopword Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ gallery/llama3-instruct.yaml | 1 + 2 files changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index d4e8d6b4..012a1ecb 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -698,6 +698,20 @@ - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" +- <<: *hermes-2-pro-mistral + name: "biomistral-7b" + description: | + BioMistral: A Collection of Open-Source Pretrained Large Language Models for Medical Domains + urls: + - https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF + icon: https://huggingface.co/BioMistral/BioMistral-7B/resolve/main/wordart_blue_m_rectangle.png?download=true + overrides: + parameters: + model: BioMistral-7B.Q4_K_M.gguf + files: + - filename: "BioMistral-7B.Q4_K_M.gguf" + sha256: "3a73107045dfe7e3f113b392b0a67e3e6ca9fa9dae2abe301424ce5abd1721a6" + uri: "huggingface://MaziyarPanahi/BioMistral-7B-GGUF/BioMistral-7B.Q4_K_M.gguf" ### END Hermes-2-Pro-Mistral ### START Cerbero - url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml index f6016cbd..3eed758b 100644 --- a/gallery/llama3-instruct.yaml +++ b/gallery/llama3-instruct.yaml @@ -41,3 +41,4 @@ config_file: | - <|im_end|> - - "<|eot_id|>" + - <|end_of_text|> From 3179c019af17a7fdede8089eaa410359ca151d74 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:32:49 +0200 Subject: [PATCH 0402/2895] Revert ":arrow_up: Update docs version mudler/LocalAI" (#2165) * Revert ":arrow_up: Update docs version mudler/LocalAI (#2149)" This reverts commit 56d843c263ce72409baefe2201130737c052239d. * Apply suggestions from code review Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index ad22e2aa..ce7f5d53 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": " 🖼️ v2.13.0 - Model gallery edition" + "version": "v2.13.0" } From 1a0a6f60a7dfd9522ca680048b62180e6b238bf0 Mon Sep 17 00:00:00 2001 From: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> Date: Sun, 28 Apr 2024 03:34:15 -0700 Subject: [PATCH 0403/2895] docs: update model-gallery.md with correct gallery file (#2163) * Update model-gallery.md with correct gallery file The readme points to a file that hasn't been updated in months so when there are announcements about new models, user's won't get them pointing to the old file. Point to the updated files instead. Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> * Update model-gallery.md second pass with more understanding Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> * Update model-gallery.md Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> * Update model-gallery.md Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> --------- Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> --- docs/content/docs/features/model-gallery.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md index 05d15ef4..137caee1 100644 --- a/docs/content/docs/features/model-gallery.md +++ b/docs/content/docs/features/model-gallery.md @@ -42,13 +42,13 @@ To enable the `model-gallery` repository you need to start `local-ai` with the ` GALLERIES=[{"name":"", "url":"where url is `github:mudler/localai/gallery/index.yaml` and name is localai +
will format the values into a valid github url `https://raw.githubusercontent.com/mudler/LocalAI/master/gallery/index.yaml` {{% alert note %}} @@ -60,7 +60,7 @@ As this feature is experimental, you need to run `local-ai` with a list of `GALL To enable the two repositories, start `LocalAI` with the `GALLERIES` environment variable: ```bash -GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}] +GALLERIES=[{"name":"gallery", "url":"github:mudler/localai/gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}] ``` If running with `docker-compose`, simply edit the `.env` file and uncomment the `GALLERIES` variable, and add the one you want to use. From 5e243ceaebf523e40968691c67851a214e9397cc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 15:54:15 +0200 Subject: [PATCH 0404/2895] docs: update gallery, add rerankers (#2166) Signed-off-by: Ettore Di Giacinto --- README.md | 3 +- docs/content/docs/advanced/_index.en.md | 2 +- docs/content/docs/features/model-gallery.md | 160 +++++++------------- docs/content/docs/features/reranker.md | 57 +++++++ docs/content/docs/features/text-to-audio.md | 4 + docs/content/docs/integrations.md | 2 +- docs/content/docs/overview.md | 3 +- docs/content/docs/reference/_index.en.md | 2 +- 8 files changed, 123 insertions(+), 110 deletions(-) create mode 100644 docs/content/docs/features/reranker.md diff --git a/README.md b/README.md index 7fe1a598..27b871ab 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,8 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) -- 🆕 [Vision API](https://localai.io/features/gpt-vision/) +- 🥽 [Vision API](https://localai.io/features/gpt-vision/) +- 🆕 [Reranker API](https://localai.io/features/reranker/) ## 💻 Usage diff --git a/docs/content/docs/advanced/_index.en.md b/docs/content/docs/advanced/_index.en.md index fade370b..bee814b4 100644 --- a/docs/content/docs/advanced/_index.en.md +++ b/docs/content/docs/advanced/_index.en.md @@ -2,7 +2,7 @@ weight: 20 title: "Advanced" description: "Advanced usage" -icon: science +icon: settings lead: "" date: 2020-10-06T08:49:15+00:00 lastmod: 2020-10-06T08:49:15+00:00 diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md index 137caee1..eca9519b 100644 --- a/docs/content/docs/features/model-gallery.md +++ b/docs/content/docs/features/model-gallery.md @@ -7,15 +7,10 @@ weight = 18 url = '/models' +++ -

-
-
-
-

+The model gallery is a curated collection of models configurations for [LocalAI](https://github.com/go-skynet/LocalAI) that enables one-click install of models directly from the LocalAI Web interface. -The model gallery is a (experimental!) collection of models configurations for [LocalAI](https://github.com/go-skynet/LocalAI). +LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API or the Web interface to configure, download and verify the model assets for you. -LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API to configure, download and verify the model assets for you. As the UI is still a work in progress, you will find here the documentation about the API Endpoints. {{% alert note %}} The models in this gallery are not directly maintained by LocalAI. If you find a model that is not working, please open an issue on the model gallery repository. @@ -25,58 +20,55 @@ The models in this gallery are not directly maintained by LocalAI. If you find a GPT and text generation models might have a license which is not permissive for commercial use or might be questionable or without any license at all. Please check the model license before using it. The official gallery contains only open licensed models. {{% /alert %}} +![output](https://github.com/mudler/LocalAI/assets/2420543/7b16676e-d5b1-4c97-89bd-9fa5065c21ad) + ## Useful Links and resources - [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) - here you can find a list of the most performing models on the Open LLM benchmark. Keep in mind models compatible with LocalAI must be quantized in the `gguf` format. +## How it works -## Model repositories +Navigate the WebUI interface in the "Models" section from the navbar at the top. Here you can find a list of models that can be installed, and you can install them by clicking the "Install" button. + +## Add other galleries + +You can add other galleries by setting the `GALLERIES` environment variable. The `GALLERIES` environment variable is a list of JSON objects, where each object has a `name` and a `url` field. The `name` field is the name of the gallery, and the `url` field is the URL of the gallery's index file, for example: + +```json +GALLERIES=[{"name":"", "url":"", "url":"where url is `github:mudler/localai/gallery/index.yaml` and name is localai -
will format the values into a valid github url `https://raw.githubusercontent.com/mudler/LocalAI/master/gallery/index.yaml` + +where `github:mudler/localai/gallery/index.yaml` will be expanded automatically to `https://raw.githubusercontent.com/mudler/LocalAI/main/index.yaml`. + +Note: the url are expanded automatically for `github` and `huggingface`, however `https://` and `http://` prefix works as well. {{% alert note %}} -As this feature is experimental, you need to run `local-ai` with a list of `GALLERIES`. Currently there are two galleries: - -- An official one, containing only definitions and models with a clear LICENSE to avoid any dmca infringment. As I'm not sure what's the best action to do in this case, I'm not going to include any model that is not clearly licensed in this repository which is offically linked to LocalAI. -- A "community" one that contains an index of `huggingface` models that are compatible with the `ggml` format and lives in the `localai-huggingface-zoo` repository. - -To enable the two repositories, start `LocalAI` with the `GALLERIES` environment variable: - -```bash -GALLERIES=[{"name":"gallery", "url":"github:mudler/localai/gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}] -``` - -If running with `docker-compose`, simply edit the `.env` file and uncomment the `GALLERIES` variable, and add the one you want to use. - -{{% /alert %}} - -{{% alert note %}} -You might not find all the models in this gallery. Automated CI updates the gallery automatically. You can find however most of the models on huggingface (https://huggingface.co/), generally it should be available `~24h` after upload. - -By under any circumstances LocalAI and any developer is not responsible for the models in this gallery, as CI is just indexing them and providing a convenient way to install with an automatic configuration with a consistent API. Don't install models from authors you don't trust, and, check the appropriate license for your use case. Models are automatically indexed and hosted on huggingface (https://huggingface.co/). For any issue with the models, please open an issue on the model gallery repository if it's a LocalAI misconfiguration, otherwise refer to the huggingface repository. If you think a model should not be listed, please reach to us and we will remove it from the gallery. -{{% /alert %}} - -{{% alert note %}} - -There is no documentation yet on how to build a gallery or a repository - but you can find an example in the [model-gallery](https://github.com/go-skynet/model-gallery) repository. - +If you want to build your own gallery, there is no documentation yet. However you can find the source of the default gallery in the [LocalAI repository](https://github.com/mudler/LocalAI/tree/master/gallery). {{% /alert %}} @@ -110,34 +102,16 @@ To install a model from the gallery repository, you can pass the model name in t ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "id": "model-gallery@bert-embeddings" + "id": "localai@bert-embeddings" }' ``` where: -- `model-gallery` is the repository. It is optional and can be omitted. If the repository is omitted LocalAI will search the model by name in all the repositories. In the case the same model name is present in both galleries the first match wins. +- `localai` is the repository. It is optional and can be omitted. If the repository is omitted LocalAI will search the model by name in all the repositories. In the case the same model name is present in both galleries the first match wins. - `bert-embeddings` is the model name in the gallery - (read its [config here](https://github.com/go-skynet/model-gallery/blob/main/bert-embeddings.yaml)). + (read its [config here](https://github.com/mudler/LocalAI/tree/master/gallery/blob/main/bert-embeddings.yaml)). -{{% alert note %}} -If the `huggingface` model gallery is enabled (it's enabled by default), -and the model has an entry in the model gallery's associated YAML config -(for `huggingface`, see [`model-gallery/huggingface.yaml`](https://github.com/go-skynet/model-gallery/blob/main/huggingface.yaml)), -you can install models by specifying directly the model's `id`. -For example, to install wizardlm superhot: - -```bash -LOCALAI=http://localhost:8080 -curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "id": "huggingface@TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GGML/wizardlm-13b-v1.0-superhot-8k.ggmlv3.q4_K_M.bin" - }' -``` - -Note that the `id` can be used similarly when pre-loading models at start. -{{% /alert %}} - - -## How to install a model (without a gallery) +### How to install a model not part of a gallery If you don't want to set any gallery repository, you can still install models by loading a model configuration file. @@ -201,13 +175,13 @@ Note: `url` or `id` must be specified. `url` is used to a url to a model gallery For example: ```bash -PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}] +PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}] ``` or as arg: ```bash -local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]' +local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]' ``` or in a YAML file: @@ -218,14 +192,14 @@ local-ai --preload-models-config "/path/to/yaml" YAML: ```yaml -- url: github:go-skynet/model-gallery/stablediffusion.yaml +- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master ```
{{% alert note %}} -You can find already some open licensed models in the [model gallery](https://github.com/go-skynet/model-gallery). +You can find already some open licensed models in the [LocalAI gallery](https://github.com/mudler/LocalAI/tree/master/gallery). If you don't find the model in the gallery you can try to use the "base" model and provide an URL to LocalAI: @@ -233,7 +207,7 @@ If you don't find the model in the gallery you can try to use the "base" model a ``` curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/base.yaml", + "url": "github:mudler/LocalAI/gallery/base.yaml@master", "name": "model-name", "files": [ { @@ -249,7 +223,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ {{% /alert %}} -## Installing a model with a different name +### Override a model name To install a model with a different name, specify a `name` parameter in the request body. @@ -266,11 +240,11 @@ For example, to install a model as `gpt-3.5-turbo`: ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/gpt4all-j.yaml", + "url": "github:mudler/LocalAI/gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo" }' ``` -## Additional Files +### Additional Files
@@ -293,7 +267,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
-## Overriding configuration files +### Overriding configuration files
@@ -324,7 +298,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ ```bash curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", + "url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002" }' ``` @@ -348,10 +322,10 @@ URL: https://github.com/EdVince/Stable-Diffusion-NCNN {{< tabs >}} {{% tab name="Prepare the model in runtime" %}} -While the API is running, you can install the model by using the `/models/apply` endpoint and point it to the `stablediffusion` model in the [models-gallery](https://github.com/go-skynet/model-gallery#image-generation-stable-diffusion): +While the API is running, you can install the model by using the `/models/apply` endpoint and point it to the `stablediffusion` model in the [models-gallery](https://github.com/mudler/LocalAI/tree/master/gallery#image-generation-stable-diffusion): ```bash curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/stablediffusion.yaml" + "url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master" }' ``` @@ -361,13 +335,13 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ You can set the `PRELOAD_MODELS` environment variable: ```bash -PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}] +PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}] ``` or as arg: ```bash -local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]' +local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]' ``` or in a YAML file: @@ -378,7 +352,7 @@ local-ai --preload-models-config "/path/to/yaml" YAML: ```yaml -- url: github:go-skynet/model-gallery/stablediffusion.yaml +- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master ``` {{% /tab %}} @@ -403,7 +377,7 @@ URL: https://github.com/ggerganov/whisper.cpp ```bash curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/whisper-base.yaml", + "url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1" }' ``` @@ -414,13 +388,13 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ You can set the `PRELOAD_MODELS` environment variable: ```bash -PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/whisper-base.yaml", "name": "whisper-1"}] +PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1"}] ``` or as arg: ```bash -local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/whisper-base.yaml", "name": "whisper-1"}]' +local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1"}]' ``` or in a YAML file: @@ -431,37 +405,13 @@ local-ai --preload-models-config "/path/to/yaml" YAML: ```yaml -- url: github:go-skynet/model-gallery/whisper-base.yaml +- url: github:mudler/LocalAI/gallery/whisper-base.yaml@master name: whisper-1 ``` {{% /tab %}} {{< /tabs >}} -### GPTs - -
- -```bash -LOCALAI=http://localhost:8080 -curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/gpt4all-j.yaml", - "name": "gpt4all-j" - }' -``` - -To test it: - -``` -curl $LOCALAI/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "gpt4all-j", - "messages": [{"role": "user", "content": "How are you?"}], - "temperature": 0.1 - }' -``` - -
- ### Note LocalAI will create a batch process that downloads the required files from a model definition and automatically reload itself to include the new model. @@ -495,7 +445,7 @@ Returns an `uuid` and an `url` to follow up the state of the process: { "uuid":"251475c9-f666-11ed-95e0-9a8a4480ac58", "status":"http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58"} ``` -To see a collection example of curated models definition files, see the [model-gallery](https://github.com/go-skynet/model-gallery). +To see a collection example of curated models definition files, see the [LocalAI repository](https://github.com/mudler/LocalAI/tree/master/gallery). #### Get model job state `/models/jobs/` diff --git a/docs/content/docs/features/reranker.md b/docs/content/docs/features/reranker.md new file mode 100644 index 00000000..92c406df --- /dev/null +++ b/docs/content/docs/features/reranker.md @@ -0,0 +1,57 @@ + ++++ +disableToc = false +title = " Reranker" +weight = 11 +url = "/features/reranker/" ++++ + +A **reranking** model, often referred to as a cross-encoder, is a core component in the two-stage retrieval systems used in information retrieval and natural language processing tasks. +Given a query and a set of documents, it will output similarity scores. + +We can use then the score to reorder the documents by relevance in our RAG system to increase its overall accuracy and filter out non-relevant results. + +![output](https://github.com/mudler/LocalAI/assets/2420543/ede67b25-fac4-4833-ae4f-78290e401e60) + +LocalAI supports reranker models, and you can use them by using the `rerankers` backend, which uses [rerankers](https://github.com/AnswerDotAI/rerankers). + +## Usage + +You can test `rerankers` by using container images with python (this does **NOT** work with `core` images) and a model config file like this, or by installing `cross-encoder` from the gallery in the UI: + +```yaml +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +# optionally: +# type: flashrank +# diffusers: +# pipeline_type: en # to specify the english language +``` + +and test it with: + +```bash + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' +``` diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md index f53407a1..ebfdda1d 100644 --- a/docs/content/docs/features/text-to-audio.md +++ b/docs/content/docs/features/text-to-audio.md @@ -163,3 +163,7 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ "input":"Hello!" }' | aplay ``` + +## Parler-tts + +`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts \ No newline at end of file diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index a7666e77..50f683c3 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -2,7 +2,7 @@ disableToc = false title = "Integrations" weight = 19 -icon = "rocket_launch" +icon = "sync" +++ diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index f0f59494..15086f6f 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -99,8 +99,9 @@ Note that this started just as a fun weekend project by [mudler](https://github. - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) -- 🆕 [Vision API](https://localai.io/features/gpt-vision/) +- 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 💾 [Stores](https://localai.io/stores) +- 🆕 [Reranker](https://localai.io/features/reranker/) ## Contribute and help diff --git a/docs/content/docs/reference/_index.en.md b/docs/content/docs/reference/_index.en.md index 339d2728..d8a8f2a7 100644 --- a/docs/content/docs/reference/_index.en.md +++ b/docs/content/docs/reference/_index.en.md @@ -2,7 +2,7 @@ weight: 23 title: "References" description: "Reference" -icon: science +icon: menu_book lead: "" date: 2020-10-06T08:49:15+00:00 lastmod: 2020-10-06T08:49:15+00:00 From a78cd677375a936d4f7ebc8212aeac847403abe1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 19:30:23 +0200 Subject: [PATCH 0405/2895] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index ab45e5aa..9fe57cef 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -30,7 +30,7 @@ Before you begin, ensure you have a container engine installed if you are not us > _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}}) or [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) to use an already-configured model_. -LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. +LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. If you don't need models pre-configured, you can use the standard [images]({{%relref "docs/reference/container-images" %}}). These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. From 26e18925215ed5bd8caf1cda2270053e10c9d9ec Mon Sep 17 00:00:00 2001 From: Sijia Lu <46901221+LeonSijiaLu@users.noreply.github.com> Date: Sun, 28 Apr 2024 13:38:02 -0400 Subject: [PATCH 0406/2895] Issue-1720: Updated `Build on mac` documentations (#2171) updated build on macs documentation Signed-off-by: LeonSijiaLu --- docs/content/docs/getting-started/build.md | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index a4db135e..7e585ab3 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -154,11 +154,11 @@ cd LocalAI # build the binary make build -# Download gpt4all-j to models/ -wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j +# Download phi-2 to models/ +wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O models/phi-2.Q2_K # Use a template from the examples -cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/ +cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl # Run LocalAI ./local-ai --models-path=./models/ --debug=true @@ -167,7 +167,7 @@ cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/ curl http://localhost:8080/v1/models curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "ggml-gpt4all-j", + "model": "phi-2.Q2_K", "messages": [{"role": "user", "content": "How are you?"}], "temperature": 0.9 }' @@ -175,9 +175,19 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso #### Troublshooting mac -If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. -If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). -If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. +1. If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. +2. After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK. + +``` +# print /Library/Developer/CommandLineTools, if command line tools were installed in advance +xcode-select --print-path + +# point to a complete SDK +sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer +``` + +3. If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). +4. If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. ``` # reinstall build dependencies From 21974fe1d34a760bd94b53a9b10a6c784452df3c Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sun, 28 Apr 2024 12:51:53 -0500 Subject: [PATCH 0407/2895] fix: swap to WHISPER_CUDA per deprecation message from whisper.cpp (#2170) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6ef6e9ab..be1c6d2e 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,7 @@ endif ifeq ($(BUILD_TYPE),cublas) CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) export LLAMA_CUBLAS=1 - export WHISPER_CUBLAS=1 + export WHISPER_CUDA=1 CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda endif From 987b7ad42d3102f535e8ed8ebd84fc303b66c519 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:24:16 -0500 Subject: [PATCH 0408/2895] feat: only keep the build artifacts from the grpc build (#2172) * feat: only keep the build artifacts from the grpc build Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: remove separate Cache GRPC build step Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: remove docker inspect step, it is leftover from previous debugging Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/image_build.yml | 28 ---------------------------- Dockerfile | 12 ++++++------ 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index fb1985fd..7d60d23a 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -201,26 +201,6 @@ jobs: username: ${{ secrets.quayUsername }} password: ${{ secrets.quayPassword }} - - name: Cache GRPC - uses: docker/build-push-action@v5 - with: - builder: ${{ steps.buildx.outputs.name }} - # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. - # This means that even the MAKEFLAGS have to be an EXACT match. - # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. - build-args: | - GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} - GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.58.0 - context: . - file: ./Dockerfile - cache-from: type=gha - target: grpc - platforms: ${{ inputs.platforms }} - push: false - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - - name: Build and push uses: docker/build-push-action@v5 with: @@ -248,14 +228,6 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - - name: Inspect image - if: github.event_name != 'pull_request' - run: | - docker pull localai/localai:${{ steps.meta.outputs.version }} - docker image inspect localai/localai:${{ steps.meta.outputs.version }} - docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} - docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} - - name: Build and push AIO image if: inputs.aio != '' uses: docker/build-push-action@v5 diff --git a/Dockerfile b/Dockerfile index 717b3a3a..1c4e24fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -148,16 +148,16 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc - -WORKDIR /build/grpc/cmake/build - # We install GRPC to a different prefix here so that we can copy in only the build artifacts later # saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree # and running make install in the target container -RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ +RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + mkdir -p /build/grpc/cmake/build && \ + cd /build/grpc/cmake/build && \ + cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ make && \ - make install + make install && \ + rm -rf /build ################################### ################################### From 01860674c4d95a1bacb2a22fb8c30d086ee25ba8 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 28 Apr 2024 23:41:12 +0200 Subject: [PATCH 0409/2895] :arrow_up: Update ggerganov/llama.cpp (#2176) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index be1c6d2e..5980fc3c 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4dba7e8114d84241c842b986e008af8b88d1a019 +CPPLLAMA_VERSION?=7bb36ccf91b8a2e92b182dd75624f1fd7cb205ac # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a24cd4fda0bdd3f7d15da05e8da9131930adc9b7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 23:41:59 +0200 Subject: [PATCH 0410/2895] docs: enhance and condense few sections (#2178) Signed-off-by: Ettore Di Giacinto --- .../container-images.md | 66 +++++++++++++++++-- .../docs/getting-started/kubernetes.md | 30 +++++++++ docs/content/docs/getting-started/manual.md | 17 +---- .../docs/getting-started/quickstart.md | 37 +++++++++-- docs/content/docs/reference/aio-images.md | 53 --------------- 5 files changed, 124 insertions(+), 79 deletions(-) rename docs/content/docs/{reference => getting-started}/container-images.md (65%) create mode 100644 docs/content/docs/getting-started/kubernetes.md delete mode 100644 docs/content/docs/reference/aio-images.md diff --git a/docs/content/docs/reference/container-images.md b/docs/content/docs/getting-started/container-images.md similarity index 65% rename from docs/content/docs/reference/container-images.md rename to docs/content/docs/getting-started/container-images.md index 6531dd97..aaeb43ec 100644 --- a/docs/content/docs/reference/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -1,13 +1,14 @@ - +++ disableToc = false -title = "Available Container images" -weight = 25 +title = "Run with container images" +weight = 6 +url = '/basics/container/' +ico = "rocket_launch" +++ LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai). -> _For All-in-One image with a pre-configured set of models and backends, see the [AIO Images]({{%relref "docs/reference/aio-images" %}})._ +All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed. For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}). @@ -22,6 +23,62 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA {{% /alert %}} +## All-in-one images + +All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. + +In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below + +| Category | Model name | Real model (CPU) | Real model (GPU) | +| ---- | ---- | ---- | ---- | +| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` | +| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` | +| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` | +| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same | +| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same | +| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` | + +### Usage + +Select the image (CPU or GPU) and start the container with Docker: + +```bash +# CPU example +docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu +``` + +LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models). + +### Available images + +| Description | Quay | Docker Hub | +| --- | --- |-----------------------------------------------| +| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | +| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | +| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | +| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | +| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | +| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` | +| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` | + +### Available environment variables + +The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image + +| Variable | Default | Description | +| ---------------------| ------- | ----------- | +| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | +| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) | + + +## Standard container images + +Standard container images do not have pre-installed models. + +Images are available with and without python dependencies. Note that images with python dependencies are bigger (in order of 17GB). + +Images with `core` in the tag are smaller and do not contain any python dependencies. + {{< tabs tabTotal="6" >}} {{% tab tabName="Vanilla / CPU Images" %}} @@ -100,4 +157,3 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA ## See Also - [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}) -- [AIO Images]({{%relref "docs/reference/aio-images" %}}) \ No newline at end of file diff --git a/docs/content/docs/getting-started/kubernetes.md b/docs/content/docs/getting-started/kubernetes.md new file mode 100644 index 00000000..17971b37 --- /dev/null +++ b/docs/content/docs/getting-started/kubernetes.md @@ -0,0 +1,30 @@ ++++ +disableToc = false +title = "Run with Kubernetes" +weight = 6 +url = '/basics/kubernetes/' +ico = "rocket_launch" ++++ + +For installing LocalAI in Kubernetes, you can use the `go-skynet` helm chart: + +```bash +# Install the helm repository +helm repo add go-skynet https://go-skynet.github.io/helm-charts/ +# Update the repositories +helm repo update +# Get the values +helm show values go-skynet/local-ai > values.yaml + +# Edit the values value if needed +# vim values.yaml ... + +# Install the helm chart +helm install local-ai go-skynet/local-ai -f values.yaml +``` + +If you prefer to install from manifest file, you can install from the deployment file, and customize as you like: + +``` +kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment.yaml +``` \ No newline at end of file diff --git a/docs/content/docs/getting-started/manual.md b/docs/content/docs/getting-started/manual.md index c2da82f7..befc0244 100644 --- a/docs/content/docs/getting-started/manual.md +++ b/docs/content/docs/getting-started/manual.md @@ -131,22 +131,7 @@ Note: If you are on Windows, please make sure the project is on the Linux Filesy {{% tab tabName="Kubernetes" %}} -For installing LocalAI in Kubernetes, you can use the following helm chart: - -```bash -# Install the helm repository -helm repo add go-skynet https://go-skynet.github.io/helm-charts/ -# Update the repositories -helm repo update -# Get the values -helm show values go-skynet/local-ai > values.yaml - -# Edit the values value if needed -# vim values.yaml ... - -# Install the helm chart -helm install local-ai go-skynet/local-ai -f values.yaml -``` +See the [Kubernetes section]({{%relref "docs/getting-started/kubernetes" %}}). {{% /tab %}} {{% tab tabName="From binary" %}} diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 9fe57cef..d4d9d7da 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -30,7 +30,7 @@ Before you begin, ensure you have a container engine installed if you are not us > _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}}) or [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) to use an already-configured model_. -LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. If you don't need models pre-configured, you can use the standard [images]({{%relref "docs/reference/container-images" %}}). +LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. If you don't need models pre-configured, you can use the standard [images]({{%relref "docs/getting-started/container-images" %}}). These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. @@ -91,7 +91,7 @@ services: # capabilities: [gpu] ``` -For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}). +For a list of all the container-images available, see [Container images]({{%relref "docs/getting-started/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/getting-started/container-images" %}}). {{% alert icon="💡" %}} @@ -114,9 +114,36 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca {{% /alert %}} +## From binary + +LocalAI is available as a standalone binary as well. Binaries are compiled for Linux and MacOS and automatically uploaded in the Github releases. Windows is known to work with WSL. + +You can check out the releases in https://github.com/mudler/LocalAI/releases. + +{{< tabs tabTotal="2" >}} +{{% tab tabName="Linux" %}} +| CPU flagset | Link | +| --- | --- | +| avx2 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx2-Linux-x86_64) | +| avx512 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx512-Linux-x86_64) | +| avx | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx-Linux-x86_64) | +{{% /tab %}} +{{% tab tabName="MacOS" %}} +| CPU flagset | Link | +| --- | --- | +| avx2 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx2-Darwin-arm64) | +| avx512 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx512-Darwin-arm64) | +| avx | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx-Darwin-arm64) | + +{{% /tab %}} + +{{< /tabs >}} + ## Try it out -LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [Integrations]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`, you can find few examples below. +Connect to LocalAI, by default the WebUI should be accessible from http://localhost:8080 . You can also use 3rd party projects to interact with LocalAI as you would use OpenAI (see also [Integrations]({{%relref "docs/integrations" %}}) ). + +You can also test out the API endpoints using `curl`, examples below. ### Text Generation @@ -300,6 +327,6 @@ Explore further resources and community contributions: - [Build LocalAI and the container image]({{%relref "docs/getting-started/build" %}}) - [Run models manually]({{%relref "docs/getting-started/manual" %}}) - [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) -- [Container images]({{%relref "docs/reference/container-images" %}}) -- [All-in-one Images]({{%relref "docs/reference/aio-images" %}}) +- [Container images]({{%relref "docs/getting-started/container-images" %}}) +- [All-in-one Images]({{%relref "docs/getting-started/container-images" %}}) - [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples) diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md deleted file mode 100644 index b5253ee4..00000000 --- a/docs/content/docs/reference/aio-images.md +++ /dev/null @@ -1,53 +0,0 @@ - -+++ -disableToc = false -title = "All-In-One images" -weight = 26 -+++ - -All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. - -In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below - -| Category | Model name | Real model (CPU) | Real model (GPU) | -| ---- | ---- | ---- | ---- | -| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` | -| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` | -| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` | -| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same | -| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same | -| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` | - -## Usage - -Select the image (CPU or GPU) and start the container with Docker: - -```bash -# CPU example -docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu -``` - -LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models). - -## Available images - -| Description | Quay | Docker Hub | -| --- | --- |-----------------------------------------------| -| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | -| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | -| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | -| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | -| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | -| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` | -| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` | - -## Available environment variables - -The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image - -| Variable | Default | Description | -| ---------------------| ------- | ----------- | -| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | -| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) | - - From e8d44447ad49679cb877a4aa025e8d6e030e9d5b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 23:42:46 +0200 Subject: [PATCH 0411/2895] feat(gallery): support model deletion (#2173) * feat(gallery): op now supports deletion of models Signed-off-by: Ettore Di Giacinto * Wire things with WebUI(WIP) Signed-off-by: Ettore Di Giacinto * minor improvements Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 68 +++++++++++++++++++++----- core/http/elements/gallery.go | 43 ++++++++++++---- core/http/endpoints/localai/gallery.go | 21 ++++++++ core/http/routes/localai.go | 2 + core/http/routes/ui.go | 44 ++++++++++++++++- core/services/gallery.go | 57 ++++++++++++++++----- pkg/gallery/gallery.go | 46 +++++++++++++++++ pkg/gallery/gallery_suite_test.go | 7 +++ pkg/gallery/models.go | 17 ++++++- pkg/gallery/models_test.go | 15 ++++++ pkg/gallery/op.go | 2 + pkg/model/loader.go | 8 ++- 12 files changed, 294 insertions(+), 36 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 35e0776d..0d7d0cbf 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -184,6 +184,36 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool { return len(c.functionCallNameString) > 0 } +// MMProjFileName returns the filename of the MMProj file +// If the MMProj is a URL, it will return the MD5 of the URL which is the filename +func (c *BackendConfig) MMProjFileName() string { + modelURL := downloader.ConvertURL(c.MMProj) + if downloader.LooksLikeURL(modelURL) { + return utils.MD5(modelURL) + } + + return c.MMProj +} + +func (c *BackendConfig) IsMMProjURL() bool { + return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj)) +} + +func (c *BackendConfig) IsModelURL() bool { + return downloader.LooksLikeURL(downloader.ConvertURL(c.Model)) +} + +// ModelFileName returns the filename of the model +// If the model is a URL, it will return the MD5 of the URL which is the filename +func (c *BackendConfig) ModelFileName() string { + modelURL := downloader.ConvertURL(c.Model) + if downloader.LooksLikeURL(modelURL) { + return utils.MD5(modelURL) + } + + return c.Model +} + func (c *BackendConfig) FunctionToCall() string { if c.functionCallNameString != "" && c.functionCallNameString != "none" && c.functionCallNameString != "auto" { @@ -532,16 +562,13 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { } } - modelURL := config.PredictionOptions.Model - modelURL = downloader.ConvertURL(modelURL) - - if downloader.LooksLikeURL(modelURL) { - // md5 of model name - md5Name := utils.MD5(modelURL) - + // If the model is an URL, expand it, and download the file + if config.IsModelURL() { + modelFileName := config.ModelFileName() + modelURL := downloader.ConvertURL(config.Model) // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status) + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) if err != nil { return err } @@ -549,9 +576,27 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { cc := cl.configs[i] c := &cc - c.PredictionOptions.Model = md5Name + c.PredictionOptions.Model = modelFileName cl.configs[i] = *c } + + if config.IsMMProjURL() { + modelFileName := config.MMProjFileName() + modelURL := downloader.ConvertURL(config.MMProj) + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.MMProj = modelFileName + cl.configs[i] = *c + } + if cl.configs[i].Name != "" { glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) } @@ -586,7 +631,8 @@ func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...C } for _, file := range files { // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") || + strings.HasPrefix(file.Name(), ".") { continue } c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 6edbd23d..8093b042 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -13,7 +13,7 @@ const ( NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" ) -func DoneProgress(uid string) string { +func DoneProgress(uid, text string) string { return elem.Div( attrs.Props{}, elem.H3( @@ -23,7 +23,7 @@ func DoneProgress(uid string) string { "tabindex": "-1", "autofocus": "", }, - elem.Text("Installation completed"), + elem.Text(text), ), ).Render() } @@ -60,7 +60,7 @@ func ProgressBar(progress string) string { ).Render() } -func StartProgressBar(uid, progress string) string { +func StartProgressBar(uid, progress, text string) string { if progress == "" { progress = "0" } @@ -77,7 +77,7 @@ func StartProgressBar(uid, progress string) string { "tabindex": "-1", "autofocus": "", }, - elem.Text("Installing"), + elem.Text(text), // This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms. elem.Div(attrs.Props{ "hx-get": "/browse/job/progress/" + uid, @@ -106,14 +106,33 @@ func cardSpan(text, icon string) elem.Node { func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[string, string]) string { //StartProgressBar(uid, "0") modelsElements := []elem.Node{} - span := func(s string) elem.Node { - return elem.Span( + // span := func(s string) elem.Node { + // return elem.Span( + // attrs.Props{ + // "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", + // }, + // elem.Text(s), + // ) + // } + deleteButton := func(m *gallery.GalleryModel) elem.Node { + return elem.Button( attrs.Props{ - "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-swap": "outerHTML", + // post the Model ID as param + "hx-post": "/browse/delete/model/" + m.Name, }, - elem.Text(s), + elem.I( + attrs.Props{ + "class": "fa-solid fa-cancel pr-2", + }, + ), + elem.Text("Delete"), ) } + installButton := func(m *gallery.GalleryModel) elem.Node { return elem.Button( attrs.Props{ @@ -202,10 +221,14 @@ func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[stri elem.If( currentlyInstalling, elem.Node( // If currently installing, show progress bar - elem.Raw(StartProgressBar(installing.Get(galleryID), "0")), + elem.Raw(StartProgressBar(installing.Get(galleryID), "0", "Installing")), ), // Otherwise, show install button (if not installed) or display "Installed" elem.If(m.Installed, - span("Installed"), + //elem.Node(elem.Div( + // attrs.Props{}, + // span("Installed"), deleteButton(m), + // )), + deleteButton(m), installButton(m), ), ), diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index b693e7c3..a74a2bb9 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -74,6 +74,27 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe } } +func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + modelName := c.Params("name") + + mgs.galleryApplier.C <- gallery.GalleryOp{ + Delete: true, + GalleryName: modelName, + } + + uuid, err := uuid.NewUUID() + if err != nil { + return err + } + + return c.JSON(struct { + ID string `json:"uuid"` + StatusURL string `json:"status"` + }{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) + } +} + func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries) diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 6415c894..138babbe 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -23,6 +23,8 @@ func RegisterLocalAIRoutes(app *fiber.App, modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint()) + app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index b63b1870..2b8c6b95 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -66,6 +66,12 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.ListModels(filteredModels, installingModels)) }) + /* + + Install routes + + */ + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service // https://htmx.org/examples/progress-bar/ app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { @@ -89,7 +95,33 @@ func RegisterUIRoutes(app *fiber.App, galleryService.C <- op }() - return c.SendString(elements.StartProgressBar(uid, "0")) + return c.SendString(elements.StartProgressBar(uid, "0", "Installation")) + }) + + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + installingModels.Set(galleryID, uid) + + op := gallery.GalleryOp{ + Id: uid, + Delete: true, + GalleryName: galleryID, + } + go func() { + galleryService.C <- op + }() + + return c.SendString(elements.StartProgressBar(uid, "0", "Deletion")) }) // Display the job current progress status @@ -118,12 +150,20 @@ func RegisterUIRoutes(app *fiber.App, // this route is hit when the job is done, and we display the // final state (for now just displays "Installation completed") app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + + status := galleryService.GetStatus(c.Params("uid")) + for _, k := range installingModels.Keys() { if installingModels.Get(k) == c.Params("uid") { installingModels.Delete(k) } } - return c.SendString(elements.DoneProgress(c.Params("uid"))) + displayText := "Installation completed" + if status.Deletion { + displayText = "Deletion completed" + } + + return c.SendString(elements.DoneProgress(c.Params("uid"), displayText)) }) } diff --git a/core/services/gallery.go b/core/services/gallery.go index b068abbb..6a54e38c 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "os" + "path/filepath" "strings" "sync" @@ -84,18 +85,47 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader } var err error - // if the request contains a gallery name, we apply the gallery from the gallery list - if op.GalleryName != "" { - if strings.Contains(op.GalleryName, "@") { - err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) - } else { - err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + + // delete a model + if op.Delete { + modelConfig := &config.BackendConfig{} + // Galleryname is the name of the model in this case + dat, err := os.ReadFile(filepath.Join(g.modelPath, op.GalleryName+".yaml")) + if err != nil { + updateError(err) + continue } - } else if op.ConfigURL != "" { - startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) - err = cl.Preload(g.modelPath) + err = yaml.Unmarshal(dat, modelConfig) + if err != nil { + updateError(err) + continue + } + + files := []string{} + // Remove the model from the config + if modelConfig.Model != "" { + files = append(files, modelConfig.ModelFileName()) + } + + if modelConfig.MMProj != "" { + files = append(files, modelConfig.MMProjFileName()) + } + + err = gallery.DeleteModelFromSystem(g.modelPath, op.GalleryName, files) } else { - err = prepareModel(g.modelPath, op.Req, cl, progressCallback) + // if the request contains a gallery name, we apply the gallery from the gallery list + if op.GalleryName != "" { + if strings.Contains(op.GalleryName, "@") { + err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + } else { + err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + } + } else if op.ConfigURL != "" { + startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + err = cl.Preload(g.modelPath) + } else { + err = prepareModel(g.modelPath, op.Req, cl, progressCallback) + } } if err != nil { @@ -116,7 +146,12 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader continue } - g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Processed: true, Message: "completed", Progress: 100}) + g.UpdateStatus(op.Id, + &gallery.GalleryOpStatus{ + Deletion: op.Delete, + Processed: true, + Message: "completed", + Progress: 100}) } } }() diff --git a/pkg/gallery/gallery.go b/pkg/gallery/gallery.go index c4575817..d90ce4d9 100644 --- a/pkg/gallery/gallery.go +++ b/pkg/gallery/gallery.go @@ -1,6 +1,7 @@ package gallery import ( + "errors" "fmt" "os" "path/filepath" @@ -184,3 +185,48 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) } return models, nil } + +func DeleteModelFromSystem(basePath string, name string, additionalFiles []string) error { + // os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths. + name = strings.ReplaceAll(name, string(os.PathSeparator), "__") + + configFile := filepath.Join(basePath, fmt.Sprintf("%s.yaml", name)) + + galleryFile := filepath.Join(basePath, galleryFileName(name)) + + var err error + // Delete all the files associated to the model + // read the model config + galleryconfig, err := ReadConfigFile(galleryFile) + if err != nil { + log.Error().Err(err).Msgf("failed to read gallery file %s", configFile) + } + + // Remove additional files + if galleryconfig != nil { + for _, f := range galleryconfig.Files { + fullPath := filepath.Join(basePath, f.Filename) + log.Debug().Msgf("Removing file %s", fullPath) + if e := os.Remove(fullPath); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e)) + } + } + } + + for _, f := range additionalFiles { + fullPath := filepath.Join(filepath.Join(basePath, f)) + log.Debug().Msgf("Removing additional file %s", fullPath) + if e := os.Remove(fullPath); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e)) + } + } + + log.Debug().Msgf("Removing model config file %s", configFile) + + // Delete the model config file + if e := os.Remove(configFile); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e)) + } + + return err +} diff --git a/pkg/gallery/gallery_suite_test.go b/pkg/gallery/gallery_suite_test.go index 44256bc2..bf13cac9 100644 --- a/pkg/gallery/gallery_suite_test.go +++ b/pkg/gallery/gallery_suite_test.go @@ -1,6 +1,7 @@ package gallery_test import ( + "os" "testing" . "github.com/onsi/ginkgo/v2" @@ -11,3 +12,9 @@ func TestGallery(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Gallery test suite") } + +var _ = BeforeSuite(func() { + if os.Getenv("FIXTURES") == "" { + Fail("FIXTURES env var not set") + } +}) diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 2ab4c832..1fc6c0a2 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -178,5 +178,20 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides log.Debug().Msgf("Written config file %s", configFilePath) } - return nil + // Save the model gallery file for further reference + modelFile := filepath.Join(basePath, galleryFileName(name)) + data, err := yaml.Marshal(config) + if err != nil { + return err + } + + log.Debug().Msgf("Written gallery file %s", modelFile) + + return os.WriteFile(modelFile, data, 0600) + + //return nil +} + +func galleryFileName(name string) string { + return "._gallery_" + name + ".yaml" } diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go index 6eb63128..bfc2b9a6 100644 --- a/pkg/gallery/models_test.go +++ b/pkg/gallery/models_test.go @@ -1,6 +1,7 @@ package gallery_test import ( + "errors" "os" "path/filepath" @@ -11,6 +12,7 @@ import ( ) var _ = Describe("Model test", func() { + Context("Downloading", func() { It("applies model correctly", func() { tempdir, err := os.MkdirTemp("", "test") @@ -80,6 +82,19 @@ var _ = Describe("Model test", func() { Expect(err).ToNot(HaveOccurred()) Expect(len(models)).To(Equal(1)) Expect(models[0].Installed).To(BeTrue()) + + // delete + err = DeleteModelFromSystem(tempdir, "bert", []string{}) + Expect(err).ToNot(HaveOccurred()) + + models, err = AvailableGalleryModels(galleries, tempdir) + Expect(err).ToNot(HaveOccurred()) + Expect(len(models)).To(Equal(1)) + Expect(models[0].Installed).To(BeFalse()) + + _, err = os.Stat(filepath.Join(tempdir, "bert.yaml")) + Expect(err).To(HaveOccurred()) + Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue()) }) It("renames model correctly", func() { diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go index 73d748bf..4637820a 100644 --- a/pkg/gallery/op.go +++ b/pkg/gallery/op.go @@ -4,12 +4,14 @@ type GalleryOp struct { Id string GalleryName string ConfigURL string + Delete bool Req GalleryModel Galleries []Gallery } type GalleryOpStatus struct { + Deletion bool `json:"deletion"` // Deletion is true if the operation is a deletion FileName string `json:"file_name"` Error error `json:"error"` Processed bool `json:"processed"` diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 1b5c9aa0..2d6b3acb 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -96,7 +96,13 @@ func (ml *ModelLoader) ListModels() ([]string, error) { models := []string{} for _, file := range files { // Skip templates, YAML, .keep, .json, and .DS_Store files - TODO: as this list grows, is there a more efficient method? - if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") || strings.HasSuffix(file.Name(), ".json") || strings.HasSuffix(file.Name(), ".DS_Store") { + if strings.HasSuffix(file.Name(), ".tmpl") || + strings.HasSuffix(file.Name(), ".keep") || + strings.HasSuffix(file.Name(), ".yaml") || + strings.HasSuffix(file.Name(), ".yml") || + strings.HasSuffix(file.Name(), ".json") || + strings.HasSuffix(file.Name(), ".DS_Store") || + strings.HasPrefix(file.Name(), ".") { continue } From 067489364934919e99647c5c966864d8c43468f1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 23:56:10 +0200 Subject: [PATCH 0412/2895] Update .env Signed-off-by: Ettore Di Giacinto --- .env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 35d4f2d7..ea2d4e35 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ # ## Define galleries. ## models will to install will be visible in `/models/available` -# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}] +# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}] ## CORS settings # LOCALAI_CORS=true @@ -86,4 +86,4 @@ # LOCALAI_WATCHDOG_BUSY=true # # Time in duration format (e.g. 1h30m) after which a backend is considered busy -# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m \ No newline at end of file +# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m From 5fef3b0ff15903d6f4f81bcdfb64632934d8c5cc Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 29 Apr 2024 00:32:45 +0200 Subject: [PATCH 0413/2895] :arrow_up: Update ggerganov/whisper.cpp (#2177) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5980fc3c..60cd3f24 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418 +WHISPER_CPP_VERSION?=22b6598cc9f1454567efa0d816fdc57637243999 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 74d903acca4729e6346405ba5098d19c7264960c Mon Sep 17 00:00:00 2001 From: Sijia Lu <46901221+LeonSijiaLu@users.noreply.github.com> Date: Sun, 28 Apr 2024 22:21:51 -0400 Subject: [PATCH 0414/2895] [Documentations] Removed invalid numberings from `troubleshooting mac` (#2174) * updated troubleshooting mac Signed-off-by: LeonSijiaLu * prepend - Signed-off-by: LeonSijiaLu --------- Signed-off-by: LeonSijiaLu --- docs/content/docs/getting-started/build.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index 7e585ab3..2b69ef4e 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -173,10 +173,11 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso }' ``` -#### Troublshooting mac +#### Troubleshooting mac -1. If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. -2. After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK. +- If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. + +- After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK. ``` # print /Library/Developer/CommandLineTools, if command line tools were installed in advance @@ -186,8 +187,9 @@ xcode-select --print-path sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer ``` -3. If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). -4. If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. +- If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). + +- If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. ``` # reinstall build dependencies From 982dc6a2bd2f509a64bee96a739acad670e56503 Mon Sep 17 00:00:00 2001 From: Dave Date: Sun, 28 Apr 2024 23:55:29 -0400 Subject: [PATCH 0415/2895] fix: github bump_docs.sh regex to drop emoji and other text (#2180) fix: bump_docs regex Signed-off-by: Dave Lee --- .github/bump_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/bump_docs.sh b/.github/bump_docs.sh index 169022aa..e69d3824 100755 --- a/.github/bump_docs.sh +++ b/.github/bump_docs.sh @@ -2,6 +2,6 @@ set -xe REPO=$1 -LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name') +LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name') cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json From b7ea9602f5d74c73c56961b853bc99c23a99d1fb Mon Sep 17 00:00:00 2001 From: fakezeta Date: Mon, 29 Apr 2024 15:11:09 +0200 Subject: [PATCH 0416/2895] fix: undefined symbol: iJIT_NotifyEvent in import torch ##2153 (#2179) * add extra index to Intel repository * Update install.sh --- backend/python/common-env/transformers/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 30ec0de0..ef768bc7 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -26,7 +26,7 @@ if [ -d "/opt/intel" ]; then # Intel GPU: If the directory exists, we assume we are using the intel image # (no conda env) # https://github.com/intel/intel-extension-for-pytorch/issues/538 - pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] + pip install torch==2.1.0.post0 torchvision==0.16.0.post0 torchaudio==2.1.0.post0 intel-extension-for-pytorch==2.1.20+xpu oneccl_bind_pt==2.1.200+xpu intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ fi # If we didn't skip conda, activate the environment From 11c48a0004022e21f12d2ffedc16534bf43acf8b Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 29 Apr 2024 09:11:42 -0400 Subject: [PATCH 0417/2895] fix: security scanner warning noise: error handlers part 2 (#2145) check off a few more error handlers Signed-off-by: Dave Lee --- core/cli/models.go | 6 +++++- core/cli/transcript.go | 8 +++++++- core/cli/tts.go | 8 +++++++- core/startup/config_file_watcher.go | 14 ++++++++++---- core/startup/startup.go | 5 ++++- pkg/functions/functions.go | 12 ++++++++++-- pkg/functions/parse.go | 10 ++++++++-- pkg/model/initializers.go | 17 ++++++++++++++--- pkg/model/loader.go | 5 ++++- pkg/model/process.go | 16 ++++++++++------ tests/integration/stores_test.go | 5 +++-- 11 files changed, 82 insertions(+), 24 deletions(-) diff --git a/core/cli/models.go b/core/cli/models.go index 6615e21d..5bbb60e6 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -64,7 +64,11 @@ func (mi *ModelsInstall) Run(ctx *Context) error { progressbar.OptionClearOnFinish(), ) progressCallback := func(fileName string, current string, total string, percentage float64) { - progressBar.Set(int(percentage * 10)) + v := int(percentage * 10) + err := progressBar.Set(v) + if err != nil { + log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar") + } } err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback) if err != nil { diff --git a/core/cli/transcript.go b/core/cli/transcript.go index 9f36a77c..1f2f779a 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -8,6 +8,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/rs/zerolog/log" ) type TranscriptCMD struct { @@ -41,7 +42,12 @@ func (t *TranscriptCMD) Run(ctx *Context) error { c.Threads = &t.Threads - defer ml.StopAllGRPC() + defer func() { + err := ml.StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("unable to stop all grpc processes") + } + }() tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) if err != nil { diff --git a/core/cli/tts.go b/core/cli/tts.go index 1d8fd3a3..d4bd2553 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -10,6 +10,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/rs/zerolog/log" ) type TTSCMD struct { @@ -40,7 +41,12 @@ func (t *TTSCMD) Run(ctx *Context) error { } ml := model.NewModelLoader(opts.ModelPath) - defer ml.StopAllGRPC() + defer func() { + err := ml.StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("unable to stop all grpc processes") + } + }() options := config.BackendConfig{} options.SetDefaults() diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 6bbb367f..259446f1 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -31,8 +31,14 @@ func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler handlers: make(map[string]fileHandler), appConfig: appConfig, } - c.Register("api_keys.json", readApiKeysJson(*appConfig), true) - c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) + err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true) + if err != nil { + log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler") + } + err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) + if err != nil { + log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler") + } return c } @@ -118,8 +124,8 @@ func (c *configFileHandler) Watch() error { } // TODO: When we institute graceful shutdown, this should be called -func (c *configFileHandler) Stop() { - c.watcher.Close() +func (c *configFileHandler) Stop() error { + return c.watcher.Close() } func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { diff --git a/core/startup/startup.go b/core/startup/startup.go index 17bbf9f5..e5660f4c 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -100,7 +100,10 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode go func() { <-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down") - ml.StopAllGRPC() + err := ml.StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("error while stopping all grpc backends") + } }() if options.WatchDog { diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go index d75a2ee3..f5e37d75 100644 --- a/pkg/functions/functions.go +++ b/pkg/functions/functions.go @@ -2,6 +2,8 @@ package functions import ( "encoding/json" + + "github.com/rs/zerolog/log" ) type Function struct { @@ -30,8 +32,14 @@ func (f Functions) ToJSONStructure() JSONFunctionStructure { prop := map[string]interface{}{} defsD := map[string]interface{}{} - json.Unmarshal(dat, &prop) - json.Unmarshal(dat2, &defsD) + err := json.Unmarshal(dat, &prop) + if err != nil { + log.Error().Err(err).Msg("error unmarshalling dat") + } + err = json.Unmarshal(dat2, &defsD) + if err != nil { + log.Error().Err(err).Msg("error unmarshalling dat2") + } if js.Defs == nil { js.Defs = defsD } diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index 5324e8c6..26312560 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -59,7 +59,10 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC if multipleResults { ss := []map[string]interface{}{} s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) + err := json.Unmarshal([]byte(s), &ss) + if err != nil { + log.Error().Err(err).Str("escapedLLMResult", s).Msg("multiple results: unable to unmarshal llm result") + } log.Debug().Msgf("Function return: %s %+v", s, ss) for _, s := range ss { @@ -83,7 +86,10 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC ss := map[string]interface{}{} // This prevent newlines to break JSON parsing for clients s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) + err := json.Unmarshal([]byte(s), &ss) + if err != nil { + log.Error().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result") + } log.Debug().Msgf("Function return: %s %+v", s, ss) // The grammar defines the function name as "function", while OpenAI returns "name" diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 5d9808a4..5a65d01f 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -70,7 +70,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string // If no specific model path is set for transformers/HF, set it to the model path for _, env := range []string{"HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"} { if os.Getenv(env) == "" { - os.Setenv(env, ml.ModelPath) + err := os.Setenv(env, ml.ModelPath) + if err != nil { + log.Error().Err(err).Str("name", env).Str("modelPath", ml.ModelPath).Msg("unable to set environment variable to modelPath") + } } } @@ -184,8 +187,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e if o.singleActiveBackend { ml.mu.Lock() log.Debug().Msgf("Stopping all backends except '%s'", o.model) - ml.StopAllExcept(o.model) + err := ml.StopAllExcept(o.model) ml.mu.Unlock() + if err != nil { + log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel") + return nil, err + } + } var backendToConsume string @@ -224,7 +232,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { // If we can have only one backend active, kill all the others (except external backends) if o.singleActiveBackend { log.Debug().Msgf("Stopping all backends except '%s'", o.model) - ml.StopAllExcept(o.model) + err := ml.StopAllExcept(o.model) + if err != nil { + log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing") + } } ml.mu.Unlock() diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 2d6b3acb..8bf9da5a 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -174,7 +174,10 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress { if !ml.grpcProcesses[s].IsAlive() { log.Debug().Msgf("GRPC Process is not responding: %s", s) // stop and delete the process, this forces to re-load the model and re-create again the service - ml.deleteProcess(s) + err := ml.deleteProcess(s) + if err != nil { + log.Error().Err(err).Str("process", s).Msg("error stopping process") + } return "" } } diff --git a/pkg/model/process.go b/pkg/model/process.go index 08822fd9..ff3b12cc 100644 --- a/pkg/model/process.go +++ b/pkg/model/process.go @@ -1,6 +1,7 @@ package model import ( + "errors" "fmt" "os" "os/signal" @@ -14,8 +15,8 @@ import ( "github.com/rs/zerolog/log" ) -func (ml *ModelLoader) StopAllExcept(s string) { - ml.StopGRPC(func(id string, p *process.Process) bool { +func (ml *ModelLoader) StopAllExcept(s string) error { + return ml.StopGRPC(func(id string, p *process.Process) bool { if id != s { for ml.models[id].GRPC(false, ml.wd).IsBusy() { log.Debug().Msgf("%s busy. Waiting.", id) @@ -43,16 +44,19 @@ func includeAllProcesses(_ string, _ *process.Process) bool { return true } -func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) { +func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error { + var err error = nil for k, p := range ml.grpcProcesses { if filter(k, p) { - ml.deleteProcess(k) + e := ml.deleteProcess(k) + err = errors.Join(err, e) } } + return err } -func (ml *ModelLoader) StopAllGRPC() { - ml.StopGRPC(includeAllProcesses) +func (ml *ModelLoader) StopAllGRPC() error { + return ml.StopGRPC(includeAllProcesses) } func (ml *ModelLoader) GetGRPCPID(id string) (int, error) { diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go index 54d0844c..ec67af78 100644 --- a/tests/integration/stores_test.go +++ b/tests/integration/stores_test.go @@ -63,8 +63,9 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs" }) AfterEach(func() { - sl.StopAllGRPC() - err := os.RemoveAll(tmpdir) + err := sl.StopAllGRPC() + Expect(err).ToNot(HaveOccurred()) + err = os.RemoveAll(tmpdir) Expect(err).ToNot(HaveOccurred()) }) From 93ca56086e7b00cc4e72c2cd0222eca061d22671 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 29 Apr 2024 08:17:09 -0500 Subject: [PATCH 0418/2895] update go-tinydream to latest commit (#2182) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 60cd3f24..0069fb8b 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568 # tinydream version -TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293 +TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 export BUILD_TYPE?= export STABLE_BUILD_TYPE?=$(BUILD_TYPE) From ea13863221b7d4ac9dbad636730c2c8599984216 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 29 Apr 2024 18:17:39 +0200 Subject: [PATCH 0419/2895] models(gallery): add llama3-32k (#2183) Signed-off-by: Ettore Di Giacinto --- gallery/chatml.yaml | 41 +++++++++++++++++++++++++++++++++++++++++ gallery/index.yaml | 19 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 gallery/chatml.yaml diff --git a/gallery/chatml.yaml b/gallery/chatml.yaml new file mode 100644 index 00000000..e27fdab8 --- /dev/null +++ b/gallery/chatml.yaml @@ -0,0 +1,41 @@ +--- +name: "chatml" + +config_file: | + mmap: true + template: + chat_message: | + <|im_start|>{{ .RoleName }} + {{- if .FunctionCall }} + Function call: + {{- else if eq .RoleName "tool" }} + Function response: + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + <|im_end|> + function: | + <|im_start|>system + You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + For each function call return a json object with function name and arguments + <|im_end|> + {{.Input -}} + <|im_start|>assistant + + chat: | + {{.Input -}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - diff --git a/gallery/index.yaml b/gallery/index.yaml index 012a1ecb..da0c9c59 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -412,6 +412,25 @@ - filename: dolphin-2.9-llama3-8b-q6_K.gguf sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf +- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "llama-3-8b-instruct-dpo-v0.3-32k" + license: llama3 + urls: + - https://huggingface.co/MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + overrides: + context_size: 32768 + parameters: + model: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf + files: + - filename: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf + sha256: 694c55b5215d03e59626cd4292076eaf31610ef27ba04737166766baa75d889f + uri: huggingface://MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF/Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf ## LLama2 and derivatives ### Start Fimbulvetr - &vicuna-chat From baff5ff8c262744aab2793809a2d764d266ac8bd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 29 Apr 2024 18:17:47 +0200 Subject: [PATCH 0420/2895] models(gallery): add openvino models (#2184) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 46 ++++++++++++++++++++++++++++++++++++++++++- gallery/openvino.yaml | 12 +++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 gallery/openvino.yaml diff --git a/gallery/index.yaml b/gallery/index.yaml index da0c9c59..e510e97e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -778,7 +778,51 @@ - filename: "codellama-7b.Q4_0.gguf" sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" - +### START OpenVINO +- &openvino + url: "github:mudler/LocalAI/gallery/openvino.yaml@master" + name: "openvino-llama-3-8b-instruct-ov-int8" + license: llama3 + urls: + - https://huggingface.co/fakezeta/llama-3-8b-instruct-ov-int8 + overrides: + parameters: + model: fakezeta/llama-3-8b-instruct-ov-int8 + stopwords: + - "<|eot_id|>" + - "<|end_of_text|>" + tags: + - llm + - openvino + - gpu + - llama3 + - cpu +- <<: *openvino + name: "openvino-phi3" + urls: + - https://huggingface.co/fakezeta/Phi-3-mini-128k-instruct-ov-int8 + overrides: + context_size: 131072 + parameters: + model: fakezeta/Phi-3-mini-128k-instruct-ov-int8 + stopwords: + - <|end|> +- <<: *openvino + name: "openvino-starling-lm-7b-beta-openvino-int8" + urls: + - https://huggingface.co/fakezeta/Starling-LM-7B-beta-openvino-int8 + overrides: + context_size: 8192 + parameters: + model: fakezeta/Starling-LM-7B-beta-openvino-int8 +- <<: *openvino + name: "openvino-wizardlm2" + urls: + - https://huggingface.co/fakezeta/Not-WizardLM-2-7B-ov-int8 + overrides: + context_size: 8192 + parameters: + model: fakezeta/Not-WizardLM-2-7B-ov-int8 ### START Embeddings - &sentencentransformers description: | diff --git a/gallery/openvino.yaml b/gallery/openvino.yaml new file mode 100644 index 00000000..b30b2a85 --- /dev/null +++ b/gallery/openvino.yaml @@ -0,0 +1,12 @@ +--- +name: openvino + +config_file: | + backend: transformers + context_size: 8192 + type: OVModelForCausalLM + template: + use_tokenizer_template: true + stopwords: + - "<|eot_id|>" + - "<|end_of_text|>" From 147440b39b9ed5d0542ea79920662aaddb3d0935 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 29 Apr 2024 18:31:50 +0200 Subject: [PATCH 0421/2895] docs: add reference for concurrent requests Signed-off-by: Ettore Di Giacinto --- docs/content/docs/advanced/advanced-usage.md | 26 +++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index cbf7dba3..085606e5 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -498,4 +498,28 @@ When using the `-core` container image it is possible to prepare the python back ```bash docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core -``` \ No newline at end of file +``` + +### Concurrent requests + +LocalAI supports parallel requests for the backends that supports it. For instance, vLLM and llama.cpp supports parallel requests, and thus LocalAI allows to run multiple requests in parallel. + +In order to enable parallel requests, you have to pass `--parallel-requests` or set the `PARALLEL_REQUEST` to true as environment variable. + +A list of the environment variable that tweaks parallelism is the following: + +``` +### Python backends GRPC max workers +### Default number of workers for GRPC Python backends. +### This actually controls wether a backend can process multiple requests or not. +# PYTHON_GRPC_MAX_WORKERS=1 + +### Define the number of parallel LLAMA.cpp workers (Defaults to 1) +# LLAMACPP_PARALLEL=1 + +### Enable to run parallel requests +# LOCALAI_PARALLEL_REQUESTS=true +``` + +Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests. + From c4f958e11b59534c67ce9a69bff1733ab6817d3e Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 29 Apr 2024 13:42:37 -0400 Subject: [PATCH 0422/2895] refactor(application): introduce application global state (#2072) * start breaking up the giant channel refactor now that it's better understood - easier to merge bites Signed-off-by: Dave Lee * add concurrency and base64 back in, along with new base64 tests. Signed-off-by: Dave Lee * Automatic rename of whisper.go's Result to TranscriptResult Signed-off-by: Dave Lee * remove pkg/concurrency - significant changes coming in split 2 Signed-off-by: Dave Lee * fix comments Signed-off-by: Dave Lee * add list_model service as another low-risk service to get it out of the way Signed-off-by: Dave Lee * split backend config loader into seperate file from the actual config struct. No changes yet, just reduce cognative load with smaller files of logical blocks Signed-off-by: Dave Lee * rename state.go ==> application.go Signed-off-by: Dave Lee * fix lost import? Signed-off-by: Dave Lee --------- Signed-off-by: Dave Lee --- backend/go/transcribe/transcript.go | 4 +- backend/go/transcribe/whisper.go | 2 +- core/application.go | 39 +++ core/backend/transcript.go | 2 +- core/config/backend_config.go | 313 +---------------- core/config/backend_config_loader.go | 317 ++++++++++++++++++ core/http/app.go | 23 -- .../http/endpoints/localai/backend_monitor.go | 4 +- core/http/endpoints/openai/list.go | 52 +-- core/http/routes/localai.go | 6 +- core/http/routes/openai.go | 6 +- core/schema/{whisper.go => transcription.go} | 2 +- core/services/backend_monitor.go | 42 +-- core/services/list_models.go | 72 ++++ core/startup/startup.go | 31 ++ pkg/grpc/backend.go | 2 +- pkg/grpc/base/base.go | 4 +- pkg/grpc/client.go | 4 +- pkg/grpc/embed.go | 4 +- pkg/grpc/interface.go | 2 +- pkg/utils/base64.go | 50 +++ pkg/utils/base64_test.go | 31 ++ 22 files changed, 590 insertions(+), 422 deletions(-) create mode 100644 core/application.go create mode 100644 core/config/backend_config_loader.go rename core/schema/{whisper.go => transcription.go} (90%) create mode 100644 core/services/list_models.go create mode 100644 pkg/utils/base64.go create mode 100644 pkg/utils/base64_test.go diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index 74833e4d..256be71f 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -29,8 +29,8 @@ func audioToWav(src, dst string) error { return nil } -func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) { - res := schema.Result{} +func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) { + res := schema.TranscriptionResult{} dir, err := os.MkdirTemp("", "whisper") if err != nil { diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go index ac93be01..a9a62d24 100644 --- a/backend/go/transcribe/whisper.go +++ b/backend/go/transcribe/whisper.go @@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error { return err } -func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) { +func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) { return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads)) } diff --git a/core/application.go b/core/application.go new file mode 100644 index 00000000..54d3dedf --- /dev/null +++ b/core/application.go @@ -0,0 +1,39 @@ +package core + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/model" +) + +// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy +// Perhaps a proper DI system is worth it in the future, but for now keep things simple. +type Application struct { + + // Application-Level Config + ApplicationConfig *config.ApplicationConfig + // ApplicationState *ApplicationState + + // Core Low-Level Services + BackendConfigLoader *config.BackendConfigLoader + ModelLoader *model.ModelLoader + + // Backend Services + // EmbeddingsBackendService *backend.EmbeddingsBackendService + // ImageGenerationBackendService *backend.ImageGenerationBackendService + // LLMBackendService *backend.LLMBackendService + // TranscriptionBackendService *backend.TranscriptionBackendService + // TextToSpeechBackendService *backend.TextToSpeechBackendService + + // LocalAI System Services + BackendMonitorService *services.BackendMonitorService + GalleryService *services.GalleryService + ListModelsService *services.ListModelsService + LocalAIMetricsService *services.LocalAIMetricsService + // OpenAIService *services.OpenAIService +} + +// TODO [NEXT PR?]: Break up ApplicationConfig. +// Migrate over stuff that is not set via config at all - especially runtime stuff +type ApplicationState struct { +} diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 4c3859df..e620bebd 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -11,7 +11,7 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) { +func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(model.WhisperBackend), diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 0d7d0cbf..cb1b7c2a 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -1,23 +1,12 @@ package config import ( - "errors" - "fmt" - "io/fs" "os" - "path/filepath" - "sort" - "strings" - "sync" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" - "gopkg.in/yaml.v3" - - "github.com/charmbracelet/glamour" ) const ( @@ -140,7 +129,7 @@ type LLMConfig struct { EnforceEager bool `yaml:"enforce_eager"` // vLLM SwapSpace int `yaml:"swap_space"` // vLLM MaxModelLen int `yaml:"max_model_len"` // vLLM - TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM + TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM MMProj string `yaml:"mmproj"` RopeScaling string `yaml:"rope_scaling"` @@ -343,303 +332,3 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.Debug = &trueV } } - -////// Config Loader //////// - -type BackendConfigLoader struct { - configs map[string]BackendConfig - sync.Mutex -} - -type LoadOptions struct { - debug bool - threads, ctxSize int - f16 bool -} - -func LoadOptionDebug(debug bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.debug = debug - } -} - -func LoadOptionThreads(threads int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.threads = threads - } -} - -func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.ctxSize = ctxSize - } -} - -func LoadOptionF16(f16 bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.f16 = f16 - } -} - -type ConfigLoaderOption func(*LoadOptions) - -func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { - for _, l := range options { - l(lo) - } -} - -// Load a config file for a model -func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - - // Load a config file if present after the model name - cfg := &BackendConfig{ - PredictionOptions: schema.PredictionOptions{ - Model: modelName, - }, - } - - cfgExisting, exists := cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } else { - // Try loading a model config file - modelConfig := filepath.Join(modelPath, modelName+".yaml") - if _, err := os.Stat(modelConfig); err == nil { - if err := cl.LoadBackendConfig( - modelConfig, opts..., - ); err != nil { - return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } - } - } - - cfg.SetDefaults(opts...) - - return cfg, nil -} - -func NewBackendConfigLoader() *BackendConfigLoader { - return &BackendConfigLoader{ - configs: make(map[string]BackendConfig), - } -} -func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { - c := &[]*BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - for _, cc := range *c { - cc.SetDefaults(opts...) - } - - return *c, nil -} - -func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - lo := &LoadOptions{} - lo.Apply(opts...) - - c := &BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - c.SetDefaults(opts...) - return c, nil -} - -func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - c, err := ReadBackendConfigFile(file, opts...) - if err != nil { - return fmt.Errorf("cannot load config file: %w", err) - } - - for _, cc := range c { - cm.configs[cc.Name] = *cc - } - return nil -} - -func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { - cl.Lock() - defer cl.Unlock() - c, err := ReadBackendConfig(file, opts...) - if err != nil { - return fmt.Errorf("cannot read config file: %w", err) - } - - cl.configs[c.Name] = *c - return nil -} - -func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { - cl.Lock() - defer cl.Unlock() - v, exists := cl.configs[m] - return v, exists -} - -func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { - cl.Lock() - defer cl.Unlock() - var res []BackendConfig - for _, v := range cl.configs { - res = append(res, v) - } - - sort.SliceStable(res, func(i, j int) bool { - return res[i].Name < res[j].Name - }) - - return res -} - -func (cl *BackendConfigLoader) ListBackendConfigs() []string { - cl.Lock() - defer cl.Unlock() - var res []string - for k := range cl.configs { - res = append(res, k) - } - return res -} - -// Preload prepare models if they are not local but url or huggingface repositories -func (cl *BackendConfigLoader) Preload(modelPath string) error { - cl.Lock() - defer cl.Unlock() - - status := func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - } - - log.Info().Msgf("Preloading models from %s", modelPath) - - renderMode := "dark" - if os.Getenv("COLOR") != "" { - renderMode = os.Getenv("COLOR") - } - - glamText := func(t string) { - out, err := glamour.Render(t, renderMode) - if err == nil && os.Getenv("NO_COLOR") == "" { - fmt.Println(out) - } else { - fmt.Println(t) - } - } - - for i, config := range cl.configs { - - // Download files and verify their SHA - for i, file := range config.DownloadFiles { - log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) - - if err := utils.VerifyPath(file.Filename, modelPath); err != nil { - return err - } - // Create file path - filePath := filepath.Join(modelPath, file.Filename) - - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { - return err - } - } - - // If the model is an URL, expand it, and download the file - if config.IsModelURL() { - modelFileName := config.ModelFileName() - modelURL := downloader.ConvertURL(config.Model) - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) - if err != nil { - return err - } - } - - cc := cl.configs[i] - c := &cc - c.PredictionOptions.Model = modelFileName - cl.configs[i] = *c - } - - if config.IsMMProjURL() { - modelFileName := config.MMProjFileName() - modelURL := downloader.ConvertURL(config.MMProj) - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) - if err != nil { - return err - } - } - - cc := cl.configs[i] - c := &cc - c.MMProj = modelFileName - cl.configs[i] = *c - } - - if cl.configs[i].Name != "" { - glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) - } - if cl.configs[i].Description != "" { - //glamText("**Description**") - glamText(cl.configs[i].Description) - } - if cl.configs[i].Usage != "" { - //glamText("**Usage**") - glamText(cl.configs[i].Usage) - } - } - return nil -} - -// LoadBackendConfigsFromPath reads all the configurations of the models from a path -// (non-recursive) -func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - entries, err := os.ReadDir(path) - if err != nil { - return err - } - files := make([]fs.FileInfo, 0, len(entries)) - for _, entry := range entries { - info, err := entry.Info() - if err != nil { - return err - } - files = append(files, info) - } - for _, file := range files { - // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") || - strings.HasPrefix(file.Name(), ".") { - continue - } - c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) - if err == nil { - cm.configs[c.Name] = *c - } - } - - return nil -} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go new file mode 100644 index 00000000..83b66740 --- /dev/null +++ b/core/config/backend_config_loader.go @@ -0,0 +1,317 @@ +package config + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + "github.com/charmbracelet/glamour" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" + "gopkg.in/yaml.v3" +) + +type BackendConfigLoader struct { + configs map[string]BackendConfig + sync.Mutex +} + +type LoadOptions struct { + debug bool + threads, ctxSize int + f16 bool +} + +func LoadOptionDebug(debug bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.debug = debug + } +} + +func LoadOptionThreads(threads int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.threads = threads + } +} + +func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.ctxSize = ctxSize + } +} + +func LoadOptionF16(f16 bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.f16 = f16 + } +} + +type ConfigLoaderOption func(*LoadOptions) + +func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { + for _, l := range options { + l(lo) + } +} + +// Load a config file for a model +func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + + // Load a config file if present after the model name + cfg := &BackendConfig{ + PredictionOptions: schema.PredictionOptions{ + Model: modelName, + }, + } + + cfgExisting, exists := cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + // Try loading a model config file + modelConfig := filepath.Join(modelPath, modelName+".yaml") + if _, err := os.Stat(modelConfig); err == nil { + if err := cl.LoadBackendConfig( + modelConfig, opts..., + ); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } + } + } + + cfg.SetDefaults(opts...) + + return cfg, nil +} + +func NewBackendConfigLoader() *BackendConfigLoader { + return &BackendConfigLoader{ + configs: make(map[string]BackendConfig), + } +} +func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { + c := &[]*BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + for _, cc := range *c { + cc.SetDefaults(opts...) + } + + return *c, nil +} + +func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + lo := &LoadOptions{} + lo.Apply(opts...) + + c := &BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + c.SetDefaults(opts...) + return c, nil +} + +func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + c, err := ReadBackendConfigFile(file, opts...) + if err != nil { + return fmt.Errorf("cannot load config file: %w", err) + } + + for _, cc := range c { + cm.configs[cc.Name] = *cc + } + return nil +} + +func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { + cl.Lock() + defer cl.Unlock() + c, err := ReadBackendConfig(file, opts...) + if err != nil { + return fmt.Errorf("cannot read config file: %w", err) + } + + cl.configs[c.Name] = *c + return nil +} + +func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { + cl.Lock() + defer cl.Unlock() + v, exists := cl.configs[m] + return v, exists +} + +func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { + cl.Lock() + defer cl.Unlock() + var res []BackendConfig + for _, v := range cl.configs { + res = append(res, v) + } + + sort.SliceStable(res, func(i, j int) bool { + return res[i].Name < res[j].Name + }) + + return res +} + +func (cl *BackendConfigLoader) ListBackendConfigs() []string { + cl.Lock() + defer cl.Unlock() + var res []string + for k := range cl.configs { + res = append(res, k) + } + return res +} + +// Preload prepare models if they are not local but url or huggingface repositories +func (cl *BackendConfigLoader) Preload(modelPath string) error { + cl.Lock() + defer cl.Unlock() + + status := func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + } + + log.Info().Msgf("Preloading models from %s", modelPath) + + renderMode := "dark" + if os.Getenv("COLOR") != "" { + renderMode = os.Getenv("COLOR") + } + + glamText := func(t string) { + out, err := glamour.Render(t, renderMode) + if err == nil && os.Getenv("NO_COLOR") == "" { + fmt.Println(out) + } else { + fmt.Println(t) + } + } + + for i, config := range cl.configs { + + // Download files and verify their SHA + for i, file := range config.DownloadFiles { + log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) + + if err := utils.VerifyPath(file.Filename, modelPath); err != nil { + return err + } + // Create file path + filePath := filepath.Join(modelPath, file.Filename) + + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { + return err + } + } + + // If the model is an URL, expand it, and download the file + if config.IsModelURL() { + modelFileName := config.ModelFileName() + modelURL := downloader.ConvertURL(config.Model) + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.PredictionOptions.Model = modelFileName + cl.configs[i] = *c + } + + if config.IsMMProjURL() { + modelFileName := config.MMProjFileName() + modelURL := downloader.ConvertURL(config.MMProj) + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.MMProj = modelFileName + cl.configs[i] = *c + } + + if cl.configs[i].Name != "" { + glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) + } + if cl.configs[i].Description != "" { + //glamText("**Description**") + glamText(cl.configs[i].Description) + } + if cl.configs[i].Usage != "" { + //glamText("**Usage**") + glamText(cl.configs[i].Usage) + } + } + return nil +} + +// LoadBackendConfigsFromPath reads all the configurations of the models from a path +// (non-recursive) +func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + entries, err := os.ReadDir(path) + if err != nil { + return err + } + files := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return err + } + files = append(files, info) + } + for _, file := range files { + // Skip templates, YAML and .keep files + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") || + strings.HasPrefix(file.Name(), ".") { + continue + } + c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) + if err == nil { + cm.configs[c.Name] = *c + } + } + + return nil +} diff --git a/core/http/app.go b/core/http/app.go index bd740410..080535a4 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -1,9 +1,7 @@ package http import ( - "encoding/json" "errors" - "os" "strings" "github.com/go-skynet/LocalAI/pkg/utils" @@ -124,20 +122,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi return c.Next() } - // Check for api_keys.json file - fileContent, err := os.ReadFile("api_keys.json") - if err == nil { - // Parse JSON content from the file - var fileKeys []string - err := json.Unmarshal(fileContent, &fileKeys) - if err != nil { - return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) - } - - // Add file keys to options.ApiKeys - appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) - } - if len(appConfig.ApiKeys) == 0 { return c.Next() } @@ -174,13 +158,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Use(c) } - // Make sure directories exists - os.MkdirAll(appConfig.ImageDir, 0750) - os.MkdirAll(appConfig.AudioDir, 0750) - os.MkdirAll(appConfig.UploadDir, 0750) - os.MkdirAll(appConfig.ConfigsDir, 0750) - os.MkdirAll(appConfig.ModelPath, 0750) - // Load config jsons utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index 8c7a664a..dac20388 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -6,7 +6,7 @@ import ( "github.com/gofiber/fiber/v2" ) -func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) @@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error } } -func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) // Get input data from the request body diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 04e611a2..2caea96b 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -1,63 +1,23 @@ package openai import ( - "regexp" - - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/core/services" "github.com/gofiber/fiber/v2" ) -func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { - models, err := ml.ListModels() - if err != nil { - return err - } - var mm map[string]interface{} = map[string]interface{}{} - - dataModels := []schema.OpenAIModel{} - - var filterFn func(name string) bool + // If blank, no filter is applied. filter := c.Query("filter") - // If filter is not specified, do not filter the list by model name - if filter == "" { - filterFn = func(_ string) bool { return true } - } else { - // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn - rxp, err := regexp.Compile(filter) - if err != nil { - return err - } - filterFn = func(name string) bool { - return rxp.MatchString(name) - } - } - // By default, exclude any loose files that are already referenced by a configuration file. excludeConfigured := c.QueryBool("excludeConfigured", true) - // Start with the known configurations - for _, c := range cl.GetAllBackendConfigs() { - if excludeConfigured { - mm[c.Model] = nil - } - - if filterFn(c.Name) { - dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) - } + dataModels, err := lms.ListModels(filter, excludeConfigured) + if err != nil { + return err } - - // Then iterate through the loose files: - for _, m := range models { - // And only adds them if they shouldn't be skipped. - if _, exists := mm[m]; !exists && filterFn(m) { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) - } - } - return c.JSON(struct { Object string `json:"object"` Data []schema.OpenAIModel `json:"data"` diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 138babbe..a5099d60 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -52,9 +52,9 @@ func RegisterLocalAIRoutes(app *fiber.App, app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) // Experimental Backend Statistics Module - backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) + backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService)) app.Get("/version", auth, func(c *fiber.Ctx) error { return c.JSON(struct { diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index c51ccdcb..74f20175 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -4,6 +4,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) @@ -81,6 +82,7 @@ func RegisterOpenAIRoutes(app *fiber.App, } // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) + tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance. + app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS)) + app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS)) } diff --git a/core/schema/whisper.go b/core/schema/transcription.go similarity index 90% rename from core/schema/whisper.go rename to core/schema/transcription.go index 41413c1f..fe1799fa 100644 --- a/core/schema/whisper.go +++ b/core/schema/transcription.go @@ -10,7 +10,7 @@ type Segment struct { Tokens []int `json:"tokens"` } -type Result struct { +type TranscriptionResult struct { Segments []Segment `json:"segments"` Text string `json:"text"` } diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go index 979a67a3..4e993ed9 100644 --- a/core/services/backend_monitor.go +++ b/core/services/backend_monitor.go @@ -15,22 +15,22 @@ import ( gopsutil "github.com/shirou/gopsutil/v3/process" ) -type BackendMonitor struct { - configLoader *config.BackendConfigLoader - modelLoader *model.ModelLoader - options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. +type BackendMonitorService struct { + backendConfigLoader *config.BackendConfigLoader + modelLoader *model.ModelLoader + options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. } -func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor { - return BackendMonitor{ - configLoader: configLoader, - modelLoader: modelLoader, - options: appConfig, +func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService { + return &BackendMonitorService{ + modelLoader: modelLoader, + backendConfigLoader: configLoader, + options: appConfig, } } -func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) { - config, exists := bm.configLoader.GetBackendConfig(modelName) +func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) { + config, exists := bms.backendConfigLoader.GetBackendConfig(modelName) var backendId string if exists { backendId = config.Model @@ -46,8 +46,8 @@ func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string return backendId, nil } -func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { - config, exists := bm.configLoader.GetBackendConfig(model) +func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { + config, exists := bms.backendConfigLoader.GetBackendConfig(model) var backend string if exists { backend = config.Model @@ -60,7 +60,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe backend = fmt.Sprintf("%s.bin", backend) } - pid, err := bm.modelLoader.GetGRPCPID(backend) + pid, err := bms.modelLoader.GetGRPCPID(backend) if err != nil { log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid") @@ -101,12 +101,12 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe }, nil } -func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return nil, err } - modelAddr := bm.modelLoader.CheckIsLoaded(backendId) + modelAddr := bms.modelLoader.CheckIsLoaded(backendId) if modelAddr == "" { return nil, fmt.Errorf("backend %s is not currently loaded", backendId) } @@ -114,7 +114,7 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO()) if rpcErr != nil { log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) - val, slbErr := bm.SampleLocalBackendProcess(backendId) + val, slbErr := bms.SampleLocalBackendProcess(backendId) if slbErr != nil { return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) } @@ -131,10 +131,10 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse return status, nil } -func (bm BackendMonitor) ShutdownModel(modelName string) error { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) ShutdownModel(modelName string) error { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return err } - return bm.modelLoader.ShutdownModel(backendId) + return bms.modelLoader.ShutdownModel(backendId) } diff --git a/core/services/list_models.go b/core/services/list_models.go new file mode 100644 index 00000000..a21e6faf --- /dev/null +++ b/core/services/list_models.go @@ -0,0 +1,72 @@ +package services + +import ( + "regexp" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type ListModelsService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig +} + +func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService { + return &ListModelsService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + } +} + +func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { + + models, err := lms.ml.ListModels() + if err != nil { + return nil, err + } + + var mm map[string]interface{} = map[string]interface{}{} + + dataModels := []schema.OpenAIModel{} + + var filterFn func(name string) bool + + // If filter is not specified, do not filter the list by model name + if filter == "" { + filterFn = func(_ string) bool { return true } + } else { + // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn + rxp, err := regexp.Compile(filter) + if err != nil { + return nil, err + } + filterFn = func(name string) bool { + return rxp.MatchString(name) + } + } + + // Start with the known configurations + for _, c := range lms.bcl.GetAllBackendConfigs() { + if excludeConfigured { + mm[c.Model] = nil + } + + if filterFn(c.Name) { + dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) + } + } + + // Then iterate through the loose files: + for _, m := range models { + // And only adds them if they shouldn't be skipped. + if _, exists := mm[m]; !exists && filterFn(m) { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } + } + + return dataModels, nil +} diff --git a/core/startup/startup.go b/core/startup/startup.go index e5660f4c..672aee15 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/go-skynet/LocalAI/core" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" @@ -133,3 +134,33 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode log.Info().Msg("core/startup process completed!") return cl, ml, options, nil } + +// In Lieu of a proper DI framework, this function wires up the Application manually. +// This is in core/startup rather than core/state.go to keep package references clean! +func createApplication(appConfig *config.ApplicationConfig) *core.Application { + app := &core.Application{ + ApplicationConfig: appConfig, + BackendConfigLoader: config.NewBackendConfigLoader(), + ModelLoader: model.NewModelLoader(appConfig.ModelPath), + } + + var err error + + // app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + + app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath) + app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) + + app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() + if err != nil { + log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.") + } + + return app +} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index bef9e186..b5745db5 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -41,7 +41,7 @@ type Backend interface { PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) - AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) + AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error) diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index 0af5d94f..c0b4bc34 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error { return fmt.Errorf("unimplemented") } -func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) { - return schema.Result{}, fmt.Errorf("unimplemented") +func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) { + return schema.TranscriptionResult{}, fmt.Errorf("unimplemented") } func (llm *Base) TTS(*pb.TTSRequest) error { diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index fc4a12fa..06ccc1b4 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp return client.TTS(ctx, in, opts...) } -func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques if err != nil { return nil, err } - tresult := &schema.Result{} + tresult := &schema.TranscriptionResult{} for _, s := range res.Segments { tks := []int{} for _, t := range s.Tokens { diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 694e83b0..d2038759 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc. return e.s.TTS(ctx, in) } -func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { r, err := e.s.AudioTranscription(ctx, in) if err != nil { return nil, err } - tr := &schema.Result{} + tr := &schema.TranscriptionResult{} for _, s := range r.Segments { var tks []int for _, t := range s.Tokens { diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index 4d06544d..aa7a3fbc 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -15,7 +15,7 @@ type LLM interface { Load(*pb.ModelOptions) error Embeddings(*pb.PredictOptions) ([]float32, error) GenerateImage(*pb.GenerateImageRequest) error - AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) + AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) TTS(*pb.TTSRequest) error TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error) Status() (pb.StatusResponse, error) diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go new file mode 100644 index 00000000..977156e9 --- /dev/null +++ b/pkg/utils/base64.go @@ -0,0 +1,50 @@ +package utils + +import ( + "encoding/base64" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +var base64DownloadClient http.Client = http.Client{ + Timeout: 30 * time.Second, +} + +// this function check if the string is an URL, if it's an URL downloads the image in memory +// encodes it in base64 and returns the base64 string + +// This may look weird down in pkg/utils while it is currently only used in core/config +// +// but I believe it may be useful for MQTT as well in the near future, so I'm +// extracting it while I'm thinking of it. +func GetImageURLAsBase64(s string) (string, error) { + if strings.HasPrefix(s, "http") { + // download the image + resp, err := base64DownloadClient.Get(s) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // read the image data into memory + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // encode the image data in base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // return the base64 string + return encoded, nil + } + + // if the string instead is prefixed with "data:image/jpeg;base64,", drop it + if strings.HasPrefix(s, "data:image/jpeg;base64,") { + return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil + } + return "", fmt.Errorf("not valid string") +} \ No newline at end of file diff --git a/pkg/utils/base64_test.go b/pkg/utils/base64_test.go new file mode 100644 index 00000000..28a09d17 --- /dev/null +++ b/pkg/utils/base64_test.go @@ -0,0 +1,31 @@ +package utils_test + +import ( + . "github.com/go-skynet/LocalAI/pkg/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("utils/base64 tests", func() { + It("GetImageURLAsBase64 can strip data url prefixes", func() { + // This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes. + input := "data:image/jpeg;base64,FOO" + b64, err := GetImageURLAsBase64(input) + Expect(err).To(BeNil()) + Expect(b64).To(Equal("FOO")) + }) + It("GetImageURLAsBase64 returns an error for bogus data", func() { + input := "FOO" + b64, err := GetImageURLAsBase64(input) + Expect(b64).To(Equal("")) + Expect(err).ToNot(BeNil()) + Expect(err).To(MatchError("not valid string")) + }) + It("GetImageURLAsBase64 can actually download images and calculates something", func() { + // This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before... + input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg" + b64, err := GetImageURLAsBase64(input) + Expect(err).To(BeNil()) + Expect(b64).ToNot(BeNil()) + }) +}) From 53c3842bc238ef74a24329ac7d8f7d68bfd4c7c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 21:12:37 +0000 Subject: [PATCH 0423/2895] build(deps): bump dependabot/fetch-metadata from 2.0.0 to 2.1.0 (#2186) Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 2.0.0 to 2.1.0. - [Release notes](https://github.com/dependabot/fetch-metadata/releases) - [Commits](https://github.com/dependabot/fetch-metadata/compare/v2.0.0...v2.1.0) --- updated-dependencies: - dependency-name: dependabot/fetch-metadata dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 8e32aee1..c5b8f4aa 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v2.0.0 + uses: dependabot/fetch-metadata@v2.1.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" skip-commit-verification: true From 52a268c38cf5c9efe2f7a2db852ce163382f44f2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 29 Apr 2024 23:36:30 +0200 Subject: [PATCH 0424/2895] :arrow_up: Update ggerganov/llama.cpp (#2189) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0069fb8b..0f26e6ee 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=7bb36ccf91b8a2e92b182dd75624f1fd7cb205ac +CPPLLAMA_VERSION?=b8c1476e44cc1f3a1811613f65251cf779067636 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 5fd46175dcb0698ac4f480fd278374e367517389 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 29 Apr 2024 16:40:50 -0500 Subject: [PATCH 0425/2895] fix: ensure GNUMake jobserver is passed through to whisper.cpp build (#2187) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0f26e6ee..8b44374a 100644 --- a/Makefile +++ b/Makefile @@ -240,7 +240,7 @@ sources/whisper.cpp: cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp - cd sources/whisper.cpp && make libwhisper.a + cd sources/whisper.cpp && $(MAKE) libwhisper.a get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream From 29d7812344fe3b2501817215adda42a53790c876 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 30 Apr 2024 00:16:04 +0200 Subject: [PATCH 0426/2895] :arrow_up: Update ggerganov/whisper.cpp (#2188) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8b44374a..0096d3f2 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=22b6598cc9f1454567efa0d816fdc57637243999 +WHISPER_CPP_VERSION?=8fac6455ffeb0a0950a84e790ddb74f7290d33c4 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 3754f154eea8c246e2afb24f8c00d90f3f6b45e6 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Tue, 30 Apr 2024 03:12:19 -0500 Subject: [PATCH 0427/2895] feat: organize Dockerfile into distinct sections (#2181) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Dockerfile | 124 ++++++++++++++++++++++++----------------------------- 1 file changed, 55 insertions(+), 69 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1c4e24fc..4f2a73c2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,19 +2,15 @@ ARG IMAGE_TYPE=extras ARG BASE_IMAGE=ubuntu:22.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} -# extras or core +# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it. FROM ${BASE_IMAGE} AS requirements-core USER root ARG GO_VERSION=1.21.7 -ARG BUILD_TYPE -ARG CUDA_MAJOR_VERSION=11 -ARG CUDA_MINOR_VERSION=7 ARG TARGETARCH ARG TARGETVARIANT -ENV BUILD_TYPE=${BUILD_TYPE} ENV DEBIAN_FRONTEND=noninteractive ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" @@ -22,12 +18,17 @@ ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ apt-get install -y --no-install-recommends \ + build-essential \ ca-certificates \ + cmake \ curl \ + git \ python3-pip \ + python-is-python3 \ unzip && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* && \ + pip install --upgrade pip # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz @@ -47,25 +48,6 @@ RUN update-ca-certificates RUN echo "Target Architecture: $TARGETARCH" RUN echo "Target Variant: $TARGETVARIANT" -# CuBLAS requirements -RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - software-properties-common && \ - curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - rm -f cuda-keyring_1.1-1_all.deb && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* \ - ; fi - # Cuda ENV PATH /usr/local/cuda/bin:${PATH} @@ -91,6 +73,7 @@ RUN test -n "$TARGETARCH" \ ################################### ################################### +# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it FROM requirements-core AS requirements-extras RUN apt-get update && \ @@ -107,12 +90,6 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* ENV PATH="/root/.cargo/bin:${PATH}" -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - python3-pip && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - pip install --upgrade pip RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN apt-get update && \ @@ -122,13 +99,52 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN if [ ! -e /usr/bin/python ]; then \ - ln -s /usr/bin/python3 /usr/bin/python \ +################################### +################################### + +# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here. +# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg +FROM requirements-${IMAGE_TYPE} AS requirements-drivers + +ARG BUILD_TYPE +ARG CUDA_MAJOR_VERSION=11 +ARG CUDA_MINOR_VERSION=7 + +ENV BUILD_TYPE=${BUILD_TYPE} + +# CuBLAS requirements +RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + software-properties-common && \ + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + rm -f cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ + ; fi + +# If we are building with clblas support, we need the libraries for the builds +RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + libclblast-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi ################################### ################################### +# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI. +# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work. FROM ${GRPC_BASE_IMAGE} AS grpc # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI @@ -162,7 +178,9 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall ################################### ################################### -FROM requirements-${IMAGE_TYPE} AS builder +# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry. +# Adjustments to the build process should likely be made here. +FROM requirements-drivers AS builder ARG GO_TAGS="stablediffusion tts" ARG GRPC_BACKENDS @@ -181,25 +199,8 @@ COPY . . COPY .git . RUN echo "GO_TAGS: $GO_TAGS" -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - cmake \ - git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - RUN make prepare -# If we are building with clblas support, we need the libraries for the builds -RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - libclblast-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* \ - ; fi - # We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below # but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only # here so that we can generate the grpc code for the stablediffusion build @@ -225,7 +226,9 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ ################################### ################################### -FROM requirements-${IMAGE_TYPE} +# This is the final target. The result of this target will be the image uploaded to the registry. +# If you cannot find a more suitable place for an addition, this layer is a suitable place for it. +FROM requirements-drivers ARG FFMPEG ARG BUILD_TYPE @@ -253,23 +256,6 @@ RUN if [ "${FFMPEG}" = "true" ]; then \ rm -rf /var/lib/apt/lists/* \ ; fi -# Add OpenCL -RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - libclblast1 && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* \ - ; fi - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - cmake \ - git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - WORKDIR /build # we start fresh & re-copy all assets because `make build` does not clean up nicely after itself From e38610e5215508ea7399f3dd6307bd43fc9a585e Mon Sep 17 00:00:00 2001 From: fakezeta Date: Tue, 30 Apr 2024 10:13:04 +0200 Subject: [PATCH 0428/2895] feat: OpenVINO acceleration for embeddings in transformer backend (#2190) OpenVINO acceleration for embeddings New argument type: OVModelForFeatureExtraction --- .../python/transformers/transformers_server.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index a27c24da..93b2ce25 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -153,6 +153,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, device=device_map) self.OV = True + elif request.Type == "OVModelForFeatureExtraction": + from optimum.intel.openvino import OVModelForFeatureExtraction + from openvino.runtime import Core + + if "GPU" in Core().available_devices: + device_map="GPU" + else: + device_map="CPU" + self.model = OVModelForFeatureExtraction.from_pretrained(model_name, + compile=True, + trust_remote_code=request.TrustRemoteCode, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, + export=True, + device=device_map) + self.OV = True else: self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, From f7aabf1b504a6f6c471f574933144821b04ccf64 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:12:15 -0500 Subject: [PATCH 0429/2895] fix: bring everything onto the same GRPC version to fix tests (#2199) fix: more places where we are installing grpc that need a version specified fix: attempt to fix metal tests fix: metal/brew is forcing an update, they don't have 1.58 available anymore Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 2 +- .github/workflows/image_build.yml | 2 +- .github/workflows/release.yaml | 2 +- .github/workflows/test-extra.yml | 22 +++++++++---------- .github/workflows/test.yml | 4 ++-- backend/python/autogptq/autogptq.yml | 2 +- .../transformers/transformers-nvidia.yml | 2 +- .../transformers/transformers-rocm.yml | 2 +- .../common-env/transformers/transformers.yml | 2 +- backend/python/diffusers/diffusers-rocm.yml | 2 +- backend/python/diffusers/diffusers.yml | 2 +- backend/python/diffusers/install.sh | 4 ++-- backend/python/exllama/exllama.yml | 2 +- backend/python/exllama2/exllama2.yml | 2 +- backend/python/parler-tts/parler-nvidia.yml | 2 +- backend/python/parler-tts/parler.yml | 2 +- backend/python/vall-e-x/ttsvalle.yml | 2 +- 17 files changed, 29 insertions(+), 29 deletions(-) diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index deda6084..b52a137c 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -84,7 +84,7 @@ jobs: build-args: | GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.58.0 + GRPC_VERSION=v1.63.0 context: . file: ./Dockerfile cache-to: type=gha,ignore-error=true diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 7d60d23a..4b5ebecd 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -218,7 +218,7 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.58.0 + GRPC_VERSION=v1.63.0 MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index dc887fc1..364307f1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -5,7 +5,7 @@ on: - pull_request env: - GRPC_VERSION: v1.58.0 + GRPC_VERSION: v1.63.0 permissions: contents: write diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index f9476d4d..1bd342e6 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -34,7 +34,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -64,7 +64,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -95,7 +95,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -125,7 +125,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -155,7 +155,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -185,7 +185,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -217,7 +217,7 @@ jobs: # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev - # pip install --user grpcio-tools + # pip install --user grpcio-tools==1.63.0 # sudo rm -rfv /usr/bin/conda || true @@ -289,7 +289,7 @@ jobs: # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev - # pip install --user grpcio-tools + # pip install --user grpcio-tools==1.63.0 # sudo rm -rfv /usr/bin/conda || true @@ -322,7 +322,7 @@ jobs: # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev - # pip install --user grpcio-tools + # pip install --user grpcio-tools==1.63.0 # sudo rm -rfv /usr/bin/conda || true # - name: Test vllm # run: | @@ -349,7 +349,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true - name: Test vall-e-x run: | @@ -376,7 +376,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true - name: Test coqui diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f50479e1..6fa003b3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ on: - '*' env: - GRPC_VERSION: v1.58.0 + GRPC_VERSION: v1.63.0 concurrency: group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} @@ -203,7 +203,7 @@ jobs: - name: Dependencies run: | brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 - name: Test run: | export C_INCLUDE_PATH=/usr/local/include diff --git a/backend/python/autogptq/autogptq.yml b/backend/python/autogptq/autogptq.yml index d22b354e..1d11c998 100644 --- a/backend/python/autogptq/autogptq.yml +++ b/backend/python/autogptq/autogptq.yml @@ -41,7 +41,7 @@ dependencies: - filelock==3.12.4 - frozenlist==1.4.0 - fsspec==2023.6.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub==0.16.4 - idna==3.4 - jinja2==3.1.2 diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index 16e494c5..cf9f2eab 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -47,7 +47,7 @@ dependencies: - frozenlist==1.4.0 - fsspec==2023.6.0 - funcy==2.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub - idna==3.4 - jinja2==3.1.2 diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index cdefcc27..3fcc407d 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -48,7 +48,7 @@ dependencies: - frozenlist==1.4.0 - fsspec==2023.6.0 - funcy==2.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub - idna==3.4 - jinja2==3.1.2 diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 5f4e85b9..4cc66b11 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -47,7 +47,7 @@ dependencies: - frozenlist==1.4.0 - fsspec==2023.6.0 - funcy==2.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub - humanfriendly==10.0 - idna==3.4 diff --git a/backend/python/diffusers/diffusers-rocm.yml b/backend/python/diffusers/diffusers-rocm.yml index 97b2ce0f..fc1ad08c 100644 --- a/backend/python/diffusers/diffusers-rocm.yml +++ b/backend/python/diffusers/diffusers-rocm.yml @@ -34,7 +34,7 @@ dependencies: - diffusers==0.24.0 - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub>=0.19.4 - idna==3.4 - importlib-metadata==6.8.0 diff --git a/backend/python/diffusers/diffusers.yml b/backend/python/diffusers/diffusers.yml index d5d2913e..60c28db9 100644 --- a/backend/python/diffusers/diffusers.yml +++ b/backend/python/diffusers/diffusers.yml @@ -32,7 +32,7 @@ dependencies: - diffusers==0.24.0 - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub>=0.19.4 - idna==3.4 - importlib-metadata==6.8.0 diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index d83ec0be..0b6607dc 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -31,8 +31,8 @@ if [ -d "/opt/intel" ]; then --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install google-api-python-client \ - grpcio \ - grpcio-tools \ + grpcio==1.63.0 \ + grpcio-tools==1.63.0 \ diffusers==0.24.0 \ transformers>=4.25.1 \ accelerate \ diff --git a/backend/python/exllama/exllama.yml b/backend/python/exllama/exllama.yml index 0a30ee91..80f52af5 100644 --- a/backend/python/exllama/exllama.yml +++ b/backend/python/exllama/exllama.yml @@ -27,7 +27,7 @@ dependencies: - pip: - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - jinja2==3.1.2 - markupsafe==2.1.3 - mpmath==1.3.0 diff --git a/backend/python/exllama2/exllama2.yml b/backend/python/exllama2/exllama2.yml index d9060312..678d36a5 100644 --- a/backend/python/exllama2/exllama2.yml +++ b/backend/python/exllama2/exllama2.yml @@ -27,7 +27,7 @@ dependencies: - pip: - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - markupsafe==2.1.3 - mpmath==1.3.0 - networkx==3.1 diff --git a/backend/python/parler-tts/parler-nvidia.yml b/backend/python/parler-tts/parler-nvidia.yml index ed925e94..28ffd14c 100644 --- a/backend/python/parler-tts/parler-nvidia.yml +++ b/backend/python/parler-tts/parler-nvidia.yml @@ -26,7 +26,7 @@ dependencies: - zlib=1.2.13=h5eee18b_0 - pip: - accelerate>=0.11.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - numpy==1.26.0 - nvidia-cublas-cu12==12.1.3.1 - nvidia-cuda-cupti-cu12==12.1.105 diff --git a/backend/python/parler-tts/parler.yml b/backend/python/parler-tts/parler.yml index fd0c3cb6..a3028fe1 100644 --- a/backend/python/parler-tts/parler.yml +++ b/backend/python/parler-tts/parler.yml @@ -27,7 +27,7 @@ dependencies: - pip: - accelerate>=0.11.0 - numpy==1.26.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - torch==2.1.0 - transformers>=4.34.0 - descript-audio-codec diff --git a/backend/python/vall-e-x/ttsvalle.yml b/backend/python/vall-e-x/ttsvalle.yml index e235bf4e..09dbd946 100644 --- a/backend/python/vall-e-x/ttsvalle.yml +++ b/backend/python/vall-e-x/ttsvalle.yml @@ -42,7 +42,7 @@ dependencies: - future==0.18.3 - gradio==3.47.1 - gradio-client==0.6.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - h11==0.14.0 - httpcore==0.18.0 - httpx==0.25.0 From 970cb3a2196d426dd847a25de69edbbce48c0c42 Mon Sep 17 00:00:00 2001 From: Chris Jowett <421501+cryptk@users.noreply.github.com> Date: Tue, 30 Apr 2024 11:22:44 -0500 Subject: [PATCH 0430/2895] chore: update go-stablediffusion to latest commit with Make jobserver fix Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0096d3f2..bba03c90 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 # stablediffusion version -STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568 +STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f # tinydream version TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 From cd31f8d865031be56c8eb3843e4ba8fd45d0431c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 30 Apr 2024 23:24:13 +0200 Subject: [PATCH 0431/2895] models(gallery): add lexifun (#2193) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index e510e97e..cbd51a76 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -314,6 +314,26 @@ - filename: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf sha256: 9e98cd2672f716a0872912fdc4877969efd14d6f682f28e156f8591591c00d9c uri: huggingface://Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix/Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "llama-3-8b-lexifun-uncensored-v1" + icon: "https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/GrOs1IPG5EXR3MOCtcQiz.png" + license: llama3 + urls: + - https://huggingface.co/Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF + - https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1 + description: | + This is GGUF version of https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1 + + Oh, you want to know who I am? Well, I'm LexiFun, the human equivalent of a chocolate chip cookie - warm, gooey, and guaranteed to make you smile! 🍪 I'm like the friend who always has a witty comeback, a sarcastic remark, and a healthy dose of humor to brighten up even the darkest of days. And by 'healthy dose,' I mean I'm basically a walking pharmacy of laughter. You might need to take a few extra doses to fully recover from my jokes, but trust me, it's worth it! 🏥 + + So, what can I do? I can make you laugh so hard you snort your coffee out your nose, I can make you roll your eyes so hard they get stuck that way, and I can make you wonder if I'm secretly a stand-up comedian who forgot their act. 🤣 But seriously, I'm here to spread joy, one sarcastic comment at a time. And if you're lucky, I might even throw in a few dad jokes for good measure! 🤴‍♂️ Just don't say I didn't warn you. 😏 + overrides: + parameters: + model: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf + files: + - filename: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf + sha256: 961a3fb75537d650baf14dce91d40df418ec3d481b51ab2a4f44ffdfd6b5900f + uri: huggingface://Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF/LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: From b24d44dc56049ab7c2298b3e745419b6b1bfdf1c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 30 Apr 2024 23:24:28 +0200 Subject: [PATCH 0432/2895] models(gallery): add suzume-llama-3-8B-multilingual-gguf (#2194) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index cbd51a76..dd7c92b5 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -399,6 +399,22 @@ - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "suzume-llama-3-8B-multilingual" + urls: + - https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-gguf + icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kg3QjQOde0X743csGJT-f.png + description: | + This Suzume 8B, a multilingual finetune of Llama 3. + + Llama 3 has exhibited excellent performance on many English language benchmarks. However, it also seemingly been finetuned on mostly English data, meaning that it will respond in English, even if prompted in other languages. + overrides: + parameters: + model: suzume-llama-3-8B-multilingual-Q4_K_M.gguf + files: + - filename: suzume-llama-3-8B-multilingual-Q4_K_M.gguf + sha256: be197a660e56e51a24a0e0fecd42047d1b24e1423afaafa14769541b331e3269 + uri: huggingface://lightblue/suzume-llama-3-8B-multilingual-gguf/ggml-model-Q4_K_M.gguf - &dolphin name: "dolphin-2.9-llama3-8b" url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" From 445cfd4db3f4b43fc558d0101e43c60b8c36d7fb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 30 Apr 2024 23:24:41 +0200 Subject: [PATCH 0433/2895] models(gallery): add guillaumetell (#2195) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index dd7c92b5..966125c1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -128,6 +128,13 @@ urls: - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + - german description: | SauerkrautLM-llama-3-8B-Instruct @@ -701,7 +708,7 @@ - filename: "Phi-3-mini-4k-instruct-fp16.gguf" sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" -### START Hermes-2-Pro-Mistral +### START Hermes - &hermes-2-pro-mistral url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" @@ -767,7 +774,29 @@ - filename: "BioMistral-7B.Q4_K_M.gguf" sha256: "3a73107045dfe7e3f113b392b0a67e3e6ca9fa9dae2abe301424ce5abd1721a6" uri: "huggingface://MaziyarPanahi/BioMistral-7B-GGUF/BioMistral-7B.Q4_K_M.gguf" -### END Hermes-2-Pro-Mistral +- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "guillaumetell-7b" + license: apache-2 + description: | + Guillaume Tell est un Large Language Model (LLM) français basé sur Mistral Open-Hermes 2.5 optimisé pour le RAG (Retrieval Augmented Generation) avec traçabilité des sources et explicabilité. + urls: + - https://huggingface.co/MaziyarPanahi/guillaumetell-7b-GGUF + - https://huggingface.co/AgentPublic/guillaumetell-7b + tags: + - llm + - gguf + - gpu + - cpu + - openhermes + - french + overrides: + context_size: 4096 + parameters: + model: guillaumetell-7b.Q4_K_M.gguf + files: + - filename: guillaumetell-7b.Q4_K_M.gguf + sha256: bf08db5281619335f3ee87e229c8533b04262790063b061bb8f275c3e4de7061 + uri: huggingface://MaziyarPanahi/guillaumetell-7b-GGUF/guillaumetell-7b.Q4_K_M.gguf ### START Cerbero - url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" icon: https://huggingface.co/galatolo/cerbero-7b/resolve/main/README.md.d/cerbero.png @@ -781,6 +810,7 @@ - gpu - cpu - mistral + - italian overrides: parameters: model: galatolo-Q4_K.gguf From f90d56d3710e6a9bc4f875bdca39cd052cd5b0bb Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 30 Apr 2024 23:53:31 +0200 Subject: [PATCH 0434/2895] :arrow_up: Update ggerganov/llama.cpp (#2203) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bba03c90..5c8d0d97 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=b8c1476e44cc1f3a1811613f65251cf779067636 +CPPLLAMA_VERSION?=f364eb6fb5d46118a76fa045f487318de4c24961 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 962ebbaf7792e51c4106630d39b3b7a45134d751 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 May 2024 23:06:58 +0200 Subject: [PATCH 0435/2895] models(gallery): fixup phi-3 sha Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 966125c1..e81c8c05 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -697,7 +697,7 @@ model: Phi-3-mini-4k-instruct-q4.gguf files: - filename: "Phi-3-mini-4k-instruct-q4.gguf" - sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" + sha256: "8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef" uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" - <<: *phi-3 name: "phi-3-mini-4k-instruct:fp16" From 6a7a7996bb8ae40866347476f904bbd75cc5620c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 1 May 2024 23:19:44 +0200 Subject: [PATCH 0436/2895] :arrow_up: Update ggerganov/llama.cpp (#2213) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5c8d0d97..0f59a852 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=f364eb6fb5d46118a76fa045f487318de4c24961 +CPPLLAMA_VERSION?=8d608a81b7bd170f700648f8214e6f3279d4d715 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 4690b534e0a4283e329ef0446b06f261a5e99e1e Mon Sep 17 00:00:00 2001 From: fakezeta Date: Thu, 2 May 2024 09:54:29 +0200 Subject: [PATCH 0437/2895] feat: user defined inference device for CUDA and OpenVINO (#2212) user defined inference device configuration via main_gpu parameter --- .../transformers/transformers_server.py | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 93b2ce25..f40b8951 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -89,8 +89,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): quantization = None if self.CUDA: - if request.Device: - device_map=request.Device + if request.MainGPU: + device_map=request.MainGPU else: device_map="cuda:0" if request.Quantization == "bnb_4bit": @@ -143,28 +143,36 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): from optimum.intel.openvino import OVModelForCausalLM from openvino.runtime import Core - if "GPU" in Core().available_devices: - device_map="GPU" + if request.MainGPU: + device_map=request.MainGPU else: - device_map="CPU" + device_map="AUTO" + devices = Core().available_devices + if "GPU" in " ".join(devices): + device_map="AUTO:GPU" + self.model = OVModelForCausalLM.from_pretrained(model_name, compile=True, trust_remote_code=request.TrustRemoteCode, - ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT","GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"}, device=device_map) self.OV = True elif request.Type == "OVModelForFeatureExtraction": from optimum.intel.openvino import OVModelForFeatureExtraction from openvino.runtime import Core - if "GPU" in Core().available_devices: - device_map="GPU" + if request.MainGPU: + device_map=request.MainGPU else: - device_map="CPU" + device_map="AUTO" + devices = Core().available_devices + if "GPU" in " ".join(devices): + device_map="AUTO:GPU" + self.model = OVModelForFeatureExtraction.from_pretrained(model_name, compile=True, trust_remote_code=request.TrustRemoteCode, - ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT", "GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"}, export=True, device=device_map) self.OV = True @@ -371,4 +379,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - asyncio.run(serve(args.addr)) \ No newline at end of file + asyncio.run(serve(args.addr)) From e5bd9a76c7ae2738697a8d6f29eb32e39a5c079b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 May 2024 18:31:02 +0200 Subject: [PATCH 0438/2895] models(gallery): add wizardlm2 (#2209) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 27 +++++++++++++++++++++++++++ gallery/wizardlm2.yaml | 15 +++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 gallery/wizardlm2.yaml diff --git a/gallery/index.yaml b/gallery/index.yaml index e81c8c05..451182bf 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -520,6 +520,33 @@ - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf +### START Vicuna based +- &wizardlm2 + url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" + name: "wizardlm2-7b" + description: | + We introduce and opensource WizardLM-2, our next generation state-of-the-art large language models, which have improved performance on complex chat, multilingual, reasoning and agent. New family includes three cutting-edge models: WizardLM-2 8x22B, WizardLM-2 70B, and WizardLM-2 7B. + + WizardLM-2 8x22B is our most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models. + WizardLM-2 70B reaches top-tier reasoning capabilities and is the first choice in the same size. + WizardLM-2 7B is the fastest and achieves comparable performance with existing 10x larger opensource leading models. + icon: https://github.com/nlpxucan/WizardLM/raw/main/imgs/WizardLM.png + license: apache-2.0 + urls: + - https://huggingface.co/MaziyarPanahi/WizardLM-2-7B-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - mistral + overrides: + parameters: + model: WizardLM-2-7B.Q4_K_M.gguf + files: + - filename: WizardLM-2-7B.Q4_K_M.gguf + sha256: 613212417701a26fd43f565c5c424a2284d65b1fddb872b53a99ef8add796f64 + uri: huggingface://MaziyarPanahi/WizardLM-2-7B-GGUF/WizardLM-2-7B.Q4_K_M.gguf ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" diff --git a/gallery/wizardlm2.yaml b/gallery/wizardlm2.yaml new file mode 100644 index 00000000..6c2c1411 --- /dev/null +++ b/gallery/wizardlm2.yaml @@ -0,0 +1,15 @@ +--- +name: "wizardlm2" + +config_file: | + mmap: true + template: + chat_message: |- + {{if eq .RoleName "assistant"}}ASSISTANT: {{.Content}}{{else if eq .RoleName "system"}}{{.Content}}{{else if eq .RoleName "user"}}USER: {{.Content}}{{end}} + chat: "{{.Input}}ASSISTANT: " + completion: |- + {{.Input}} + context_size: 32768 + f16: true + stopwords: + - From f7f8b4804b1eada8c5cd40963e7660fb9d4e1703 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 May 2024 18:31:13 +0200 Subject: [PATCH 0439/2895] models(gallery): Add Hermes-2-Pro-Llama-3-8B-GGUF (#2218) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 451182bf..c18f1ee4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -787,6 +787,58 @@ - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" +### LLAMA3 version +- <<: *hermes-2-pro-mistral + name: "hermes-2-pro-llama-3-8b" + tags: + - llm + - gguf + - gpu + - llama3 + - cpu + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF + overrides: + parameters: + model: Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf + files: + - filename: "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" + sha256: "afe41ab251d1fd9870dd9631f60c22b22c215166308b35d7e15faa3260fa4bd7" + uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" +- <<: *hermes-2-pro-mistral + tags: + - llm + - gguf + - gpu + - llama3 + - cpu + name: "hermes-2-pro-llama-3-8b:Q5_K_M" + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF + overrides: + parameters: + model: Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf + files: + - filename: "Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" + sha256: "2be39d775b2a64aa5bbdc1f96fa1703ec54b5fa8982c1732b7ae9d2b57c6bb43" + uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" +- <<: *hermes-2-pro-mistral + tags: + - llm + - gguf + - gpu + - llama3 + - cpu + name: "hermes-2-pro-llama-3-8b:Q8_0" + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF + overrides: + parameters: + model: Hermes-2-Pro-Llama-3-8B-Q8_0.gguf + files: + - filename: "Hermes-2-Pro-Llama-3-8B-Q8_0.gguf" + sha256: "0a8f471d6940dee972e579eebdb4d536174bda82b73463cd8ac7752a7b1973a3" + uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf" - <<: *hermes-2-pro-mistral name: "biomistral-7b" description: | From 2c5a46bc34c919621a06ab4287af3053361d383c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 May 2024 21:14:10 +0200 Subject: [PATCH 0440/2895] feat(ux): Add chat, tts, and image-gen pages to the WebUI (#2222) * feat(webui): Add chat page Signed-off-by: Ettore Di Giacinto * feat(webui): Add image-gen page Signed-off-by: Ettore Di Giacinto * feat(webui): Add tts page Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- core/cli/run.go | 6 +- core/config/application_config.go | 6 +- core/http/app.go | 19 ++- core/http/routes/ui.go | 104 +++++++++++++++ core/http/routes/welcome.go | 19 --- core/http/static/chat.js | 141 ++++++++++++++++++++ core/http/static/general.css | 73 +++++++++++ core/http/static/image.js | 96 ++++++++++++++ core/http/static/tts.js | 64 +++++++++ core/http/views/chat.html | 189 +++++++++++++++++++++++++++ core/http/views/partials/head.html | 70 ++++------ core/http/views/partials/navbar.html | 3 + core/http/views/text2image.html | 89 +++++++++++++ core/http/views/tts.html | 86 ++++++++++++ 14 files changed, 890 insertions(+), 75 deletions(-) delete mode 100644 core/http/routes/welcome.go create mode 100644 core/http/static/chat.js create mode 100644 core/http/static/general.css create mode 100644 core/http/static/image.js create mode 100644 core/http/static/tts.js create mode 100644 core/http/views/chat.html create mode 100644 core/http/views/text2image.html create mode 100644 core/http/views/tts.html diff --git a/core/cli/run.go b/core/cli/run.go index 42185a28..6185627d 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -42,7 +42,7 @@ type RunCMD struct { CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` - DisableWelcome bool `env:"LOCALAI_DISABLE_WELCOME,DISABLE_WELCOME" default:"false" help:"Disable welcome pages" group:"api"` + DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"` ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` @@ -84,8 +84,8 @@ func (r *RunCMD) Run(ctx *Context) error { idleWatchDog := r.EnableWatchdogIdle busyWatchDog := r.EnableWatchdogBusy - if r.DisableWelcome { - opts = append(opts, config.DisableWelcomePage) + if r.DisableWebUI { + opts = append(opts, config.DisableWebUI) } if idleWatchDog || busyWatchDog { diff --git a/core/config/application_config.go b/core/config/application_config.go index 2d733c1e..398418ad 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -15,7 +15,7 @@ type ApplicationConfig struct { ConfigFile string ModelPath string UploadLimitMB, Threads, ContextSize int - DisableWelcomePage bool + DisableWebUI bool F16 bool Debug bool ImageDir string @@ -107,8 +107,8 @@ var EnableWatchDogBusyCheck = func(o *ApplicationConfig) { o.WatchDogBusy = true } -var DisableWelcomePage = func(o *ApplicationConfig) { - o.DisableWelcomePage = true +var DisableWebUI = func(o *ApplicationConfig) { + o.DisableWebUI = true } func SetWatchDogBusyTimeout(t time.Duration) AppOption { diff --git a/core/http/app.go b/core/http/app.go index 080535a4..19c9375f 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -1,7 +1,9 @@ package http import ( + "embed" "errors" + "net/http" "strings" "github.com/go-skynet/LocalAI/pkg/utils" @@ -18,6 +20,7 @@ import ( "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" + "github.com/gofiber/fiber/v2/middleware/filesystem" "github.com/gofiber/fiber/v2/middleware/recover" // swagger handler @@ -42,6 +45,11 @@ func readAuthHeader(c *fiber.Ctx) string { return authHeader } +// Embed a directory +// +//go:embed static/* +var embedDirStatic embed.FS + // @title LocalAI API // @version 2.0.0 // @description The LocalAI Rest API. @@ -169,10 +177,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth) routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth) routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) - routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) - routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) + if !appConfig.DisableWebUI { + routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) + } routes.RegisterJINARoutes(app, cl, ml, appConfig, auth) + app.Use("/static", filesystem.New(filesystem.Config{ + Root: http.FS(embedDirStatic), + PathPrefix: "static", + Browse: true, + })) + // Define a custom 404 handler // Note: keep this at the bottom! app.Use(notFoundHandler) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 2b8c6b95..70715823 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -7,7 +7,9 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/http/elements" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/xsync" @@ -23,6 +25,8 @@ func RegisterUIRoutes(app *fiber.App, galleryService *services.GalleryService, auth func(*fiber.Ctx) error) { + app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml)) + // keeps the state of models that are being installed from the UI var installingModels = xsync.NewSyncedMap[string, string]() @@ -32,6 +36,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Models", + "Version": internal.PrintableVersion(), "Models": template.HTML(elements.ListModels(models, installingModels)), "Repositories": appConfig.Galleries, // "ApplicationConfig": appConfig, @@ -166,4 +171,103 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.DoneProgress(c.Params("uid"), displayText)) }) + + // Show the Chat page + app.Get("/chat/:model", auth, func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + + summary := fiber.Map{ + "Title": "LocalAI - Chat with " + c.Params("model"), + "ModelsConfig": backendConfigs, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/chat", summary) + }) + app.Get("/chat/", auth, func(c *fiber.Ctx) error { + + backendConfigs := cl.GetAllBackendConfigs() + + if len(backendConfigs) == 0 { + return c.SendString("No models available") + } + + summary := fiber.Map{ + "Title": "LocalAI - Chat with " + backendConfigs[0].Name, + "ModelsConfig": backendConfigs, + "Model": backendConfigs[0].Name, + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/chat", summary) + }) + + app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + + summary := fiber.Map{ + "Title": "LocalAI - Generate images with " + c.Params("model"), + "ModelsConfig": backendConfigs, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/text2image", summary) + }) + + app.Get("/text2image/", auth, func(c *fiber.Ctx) error { + + backendConfigs := cl.GetAllBackendConfigs() + + if len(backendConfigs) == 0 { + return c.SendString("No models available") + } + + summary := fiber.Map{ + "Title": "LocalAI - Generate images with " + backendConfigs[0].Name, + "ModelsConfig": backendConfigs, + "Model": backendConfigs[0].Name, + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/text2image", summary) + }) + + app.Get("/tts/:model", auth, func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + + summary := fiber.Map{ + "Title": "LocalAI - Generate images with " + c.Params("model"), + "ModelsConfig": backendConfigs, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/tts", summary) + }) + + app.Get("/tts/", auth, func(c *fiber.Ctx) error { + + backendConfigs := cl.GetAllBackendConfigs() + + if len(backendConfigs) == 0 { + return c.SendString("No models available") + } + + summary := fiber.Map{ + "Title": "LocalAI - Generate audio with " + backendConfigs[0].Name, + "ModelsConfig": backendConfigs, + "Model": backendConfigs[0].Name, + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/tts", summary) + }) } diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go deleted file mode 100644 index 6b600d2d..00000000 --- a/core/http/routes/welcome.go +++ /dev/null @@ -1,19 +0,0 @@ -package routes - -import ( - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/http/endpoints/localai" - "github.com/go-skynet/LocalAI/pkg/model" - "github.com/gofiber/fiber/v2" -) - -func RegisterPagesRoutes(app *fiber.App, - cl *config.BackendConfigLoader, - ml *model.ModelLoader, - appConfig *config.ApplicationConfig, - auth func(*fiber.Ctx) error) { - - if !appConfig.DisableWelcomePage { - app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml)) - } -} diff --git a/core/http/static/chat.js b/core/http/static/chat.js new file mode 100644 index 00000000..48017d60 --- /dev/null +++ b/core/http/static/chat.js @@ -0,0 +1,141 @@ +/* + +https://github.com/david-haerer/chatapi + +MIT License + +Copyright (c) 2023 David Härer +Copyright (c) 2024 Ettore Di Giacinto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ +function submitKey(event) { + event.preventDefault(); + localStorage.setItem("key", document.getElementById("apiKey").value); + document.getElementById("apiKey").blur(); + } + +function submitPrompt(event) { + event.preventDefault(); + + const input = document.getElementById("input").value; + Alpine.store("chat").add("user", input); + document.getElementById("input").value = ""; + const key = localStorage.getItem("key"); + + if (input.startsWith("!img")) { + promptDallE(key, input.slice(4)); + } else { + promptGPT(key, input); + } +} + + + async function promptGPT(key, input) { + const model = document.getElementById("chat-model").value; + // Set class "loader" to the element with "loader" id + //document.getElementById("loader").classList.add("loader"); + // Make the "loader" visible + document.getElementById("loader").style.display = "block"; + document.getElementById("input").disabled = true; + document.getElementById('messages').scrollIntoView(false) + + // Source: https://stackoverflow.com/a/75751803/11386095 + const response = await fetch("/v1/chat/completions", { + method: "POST", + headers: { + Authorization: `Bearer ${key}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + messages: Alpine.store("chat").messages(), + stream: true, + }), + }); + + if (!response.ok) { + Alpine.store("chat").add( + "assistant", + `Error: POST /v1/chat/completions ${response.status}`, + ); + return; + } + + const reader = response.body + ?.pipeThrough(new TextDecoderStream()) + .getReader(); + + if (!reader) { + Alpine.store("chat").add( + "assistant", + `Error: Failed to decode API response`, + ); + return; + } + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + let dataDone = false; + const arr = value.split("\n"); + arr.forEach((data) => { + if (data.length === 0) return; + if (data.startsWith(":")) return; + if (data === "data: [DONE]") { + dataDone = true; + return; + } + const token = JSON.parse(data.substring(6)).choices[0].delta.content; + if (!token) { + return; + } + hljs.highlightAll(); + Alpine.store("chat").add("assistant", token); + document.getElementById('messages').scrollIntoView(false) + }); + hljs.highlightAll(); + if (dataDone) break; + } + // Remove class "loader" from the element with "loader" id + //document.getElementById("loader").classList.remove("loader"); + document.getElementById("loader").style.display = "none"; + // enable input + document.getElementById("input").disabled = false; + // scroll to the bottom of the chat + document.getElementById('messages').scrollIntoView(false) + // set focus to the input + document.getElementById("input").focus(); + } + + document.getElementById("key").addEventListener("submit", submitKey); + document.getElementById("prompt").addEventListener("submit", submitPrompt); + document.getElementById("input").focus(); + + const storeKey = localStorage.getItem("key"); + if (storeKey) { + document.getElementById("apiKey").value = storeKey; + } + + marked.setOptions({ + highlight: function (code) { + return hljs.highlightAuto(code).value; + }, + }); diff --git a/core/http/static/general.css b/core/http/static/general.css new file mode 100644 index 00000000..40d67fb4 --- /dev/null +++ b/core/http/static/general.css @@ -0,0 +1,73 @@ +body { + font-family: 'Inter', sans-serif; +} +.chat-container { height: 90vh; display: flex; flex-direction: column; } +.chat-messages { overflow-y: auto; flex-grow: 1; } +.htmx-indicator{ + opacity:0; + transition: opacity 10ms ease-in; +} +.htmx-request .htmx-indicator{ + opacity:1 +} +/* Loader (https://cssloaders.github.io/) */ +.loader { + width: 12px; + height: 12px; + border-radius: 50%; + display: block; + margin:15px auto; + position: relative; + color: #FFF; + box-sizing: border-box; + animation: animloader 2s linear infinite; +} + +@keyframes animloader { + 0% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; } + 25% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 2px; } + 50% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 2px, -38px 0 0 -2px; } + 75% { box-shadow: 14px 0 0 2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; } + 100% { box-shadow: 14px 0 0 -2px, 38px 0 0 2px, -14px 0 0 -2px, -38px 0 0 -2px; } +} +.progress { + height: 20px; + margin-bottom: 20px; + overflow: hidden; + background-color: #f5f5f5; + border-radius: 4px; + box-shadow: inset 0 1px 2px rgba(0,0,0,.1); +} +.progress-bar { + float: left; + width: 0%; + height: 100%; + font-size: 12px; + line-height: 20px; + color: #fff; + text-align: center; + background-color: #337ab7; + -webkit-box-shadow: inset 0 -1px 0 rgba(0,0,0,.15); + box-shadow: inset 0 -1px 0 rgba(0,0,0,.15); + -webkit-transition: width .6s ease; + -o-transition: width .6s ease; + transition: width .6s ease; +} + +.user { + background-color: #007bff; +} + +.assistant { + background-color: #28a745; +} + +.message { + display: flex; + align-items: center; +} + +.user, .assistant { + flex-grow: 1; + margin: 0.5rem; +} diff --git a/core/http/static/image.js b/core/http/static/image.js new file mode 100644 index 00000000..315bdda0 --- /dev/null +++ b/core/http/static/image.js @@ -0,0 +1,96 @@ +/* + +https://github.com/david-haerer/chatapi + +MIT License + +Copyright (c) 2023 David Härer +Copyright (c) 2024 Ettore Di Giacinto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ +function submitKey(event) { + event.preventDefault(); + localStorage.setItem("key", document.getElementById("apiKey").value); + document.getElementById("apiKey").blur(); + } + + +function genImage(event) { + event.preventDefault(); + const input = document.getElementById("input").value; + const key = localStorage.getItem("key"); + + promptDallE(key, input); + +} + +async function promptDallE(key, input) { + document.getElementById("loader").style.display = "block"; + document.getElementById("input").value = ""; + document.getElementById("input").disabled = true; + + const model = document.getElementById("image-model").value; + const response = await fetch("/v1/images/generations", { + method: "POST", + headers: { + Authorization: `Bearer ${key}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + steps: 10, + prompt: input, + n: 1, + size: "512x512", + }), + }); + const json = await response.json(); + if (json.error) { + // Display error if there is one + var div = document.getElementById('result'); // Get the div by its ID + div.innerHTML = '

' + json.error.message + '

'; + return; + } + const url = json.data[0].url; + + var div = document.getElementById('result'); // Get the div by its ID + var img = document.createElement('img'); // Create a new img element + img.src = url; // Set the source of the image + img.alt = 'Generated image'; // Set the alt text of the image + + div.innerHTML = ''; // Clear the existing content of the div + div.appendChild(img); // Add the new img element to the div + + document.getElementById("loader").style.display = "none"; + document.getElementById("input").disabled = false; + document.getElementById("input").focus(); +} + +document.getElementById("key").addEventListener("submit", submitKey); +document.getElementById("input").focus(); +document.getElementById("genimage").addEventListener("submit", genImage); +document.getElementById("loader").style.display = "none"; + +const storeKey = localStorage.getItem("key"); +if (storeKey) { + document.getElementById("apiKey").value = storeKey; +} + diff --git a/core/http/static/tts.js b/core/http/static/tts.js new file mode 100644 index 00000000..7fc74729 --- /dev/null +++ b/core/http/static/tts.js @@ -0,0 +1,64 @@ +function submitKey(event) { + event.preventDefault(); + localStorage.setItem("key", document.getElementById("apiKey").value); + document.getElementById("apiKey").blur(); + } + + +function genAudio(event) { + event.preventDefault(); + const input = document.getElementById("input").value; + const key = localStorage.getItem("key"); + + tts(key, input); +} + +async function tts(key, input) { + document.getElementById("loader").style.display = "block"; + document.getElementById("input").value = ""; + document.getElementById("input").disabled = true; + + const model = document.getElementById("tts-model").value; + const response = await fetch("/tts", { + method: "POST", + headers: { + Authorization: `Bearer ${key}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + input: input, + }), + }); + if (!response.ok) { + const jsonData = await response.json(); // Now safely parse JSON + var div = document.getElementById('result'); + div.innerHTML = '

Error: ' +jsonData.error.message + '

'; + return; + } + + var div = document.getElementById('result'); // Get the div by its ID + var link=document.createElement('a'); + link.className = "m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"; + link.innerHTML = " Download result"; + const blob = await response.blob(); + link.href=window.URL.createObjectURL(blob); + + div.innerHTML = ''; // Clear the existing content of the div + div.appendChild(link); // Add the new img element to the div + console.log(link) + document.getElementById("loader").style.display = "none"; + document.getElementById("input").disabled = false; + document.getElementById("input").focus(); +} + +document.getElementById("key").addEventListener("submit", submitKey); +document.getElementById("input").focus(); +document.getElementById("tts").addEventListener("submit", genAudio); +document.getElementById("loader").style.display = "none"; + +const storeKey = localStorage.getItem("key"); +if (storeKey) { + document.getElementById("apiKey").value = storeKey; +} + diff --git a/core/http/views/chat.html b/core/http/views/chat.html new file mode 100644 index 00000000..1a14bbc3 --- /dev/null +++ b/core/http/views/chat.html @@ -0,0 +1,189 @@ + + + + {{template "views/partials/head" .}} + + + +
+ + {{template "views/partials/navbar"}} +
+ +
+ +
+ +

Chat with {{.Model}} + +

+ +
+ + +
+ + + +
+
+ +
+

+ Start chatting with the AI by typing a prompt in the input field below. +

+
+ +
+
+ +
+ + +
+ +
+
+ + +
+ + diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html index 9dbfecdb..c0bc4134 100644 --- a/core/http/views/partials/head.html +++ b/core/http/views/partials/head.html @@ -2,6 +2,28 @@ {{.Title}} + + + + + + + - \ No newline at end of file diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index 36332ed2..6b4bb76d 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -10,6 +10,9 @@ Home Documentation Models + Chat + Generate images + TTS API
diff --git a/core/http/views/text2image.html b/core/http/views/text2image.html new file mode 100644 index 00000000..1e412933 --- /dev/null +++ b/core/http/views/text2image.html @@ -0,0 +1,89 @@ + + +{{template "views/partials/head" .}} + + + +
+ + {{template "views/partials/navbar" .}} +
+ + +
+
+ + 🖼️ Text to Image + + + + + +
+ +
+
+ + +
+ + +
+ + + +
+
+ +
+ +
+ +
+
+
+
+
+
+
+
+
+
+ + {{template "views/partials/footer" .}} +
+ + diff --git a/core/http/views/tts.html b/core/http/views/tts.html new file mode 100644 index 00000000..a60467d5 --- /dev/null +++ b/core/http/views/tts.html @@ -0,0 +1,86 @@ + + +{{template "views/partials/head" .}} + + + +
+ + {{template "views/partials/navbar" .}} +
+
+
+ + Text to speech/audio + + + + + +
+
+
+ + +
+ + +
+ + + +
+
+ +
+ +
+ +
+
+
+
+
+
+
+
+
+
+ + {{template "views/partials/footer" .}} +
+ + From 2cc1bd85af27902d58f24301ef2c11efdd63b7b8 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 2 May 2024 23:23:40 +0200 Subject: [PATCH 0441/2895] :arrow_up: Update ggerganov/llama.cpp (#2224) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0f59a852..23570c77 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=8d608a81b7bd170f700648f8214e6f3279d4d715 +CPPLLAMA_VERSION?=6ecf3189e00a1e8e737a78b6d10e1d7006e050a2 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a31d00d904a7f762c0bd561c84d62fc915aefeb3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 May 2024 00:41:45 +0200 Subject: [PATCH 0442/2895] feat(aio): switch to llama3-based for LLM (#2225) Signed-off-by: mudler --- aio/cpu/text-to-text.yaml | 2 +- aio/gpu-8g/text-to-text.yaml | 2 +- aio/intel/text-to-text.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index f2f6aeb4..902b9683 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -1,7 +1,7 @@ name: gpt-4 mmap: true parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf template: chat_message: | diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index dc620a13..902b9683 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -1,7 +1,7 @@ name: gpt-4 mmap: true parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf template: chat_message: | diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index bd6b87ba..bc11d4d7 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -2,7 +2,7 @@ name: gpt-4 mmap: false f16: false parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf template: chat_message: | From b58274b8a26a3d22605e3c484cf39c5dd9a5cf8e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 May 2024 00:43:02 +0200 Subject: [PATCH 0443/2895] feat(ui): support multilineand style `ul` (#2226) * feat(ui/chat): handle multiline in the input field Signed-off-by: mudler * feat(ui/chat): correctly display multiline messages Signed-off-by: mudler * feat(ui/chat): add list style Signed-off-by: mudler --------- Signed-off-by: mudler --- core/http/static/chat.js | 6 +---- core/http/static/general.css | 20 ++++++++++++++++ core/http/views/chat.html | 45 +++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 21 deletions(-) diff --git a/core/http/static/chat.js b/core/http/static/chat.js index 48017d60..db7e7856 100644 --- a/core/http/static/chat.js +++ b/core/http/static/chat.js @@ -40,11 +40,7 @@ function submitPrompt(event) { document.getElementById("input").value = ""; const key = localStorage.getItem("key"); - if (input.startsWith("!img")) { - promptDallE(key, input.slice(4)); - } else { - promptGPT(key, input); - } + promptGPT(key, input); } diff --git a/core/http/static/general.css b/core/http/static/general.css index 40d67fb4..fd1161e8 100644 --- a/core/http/static/general.css +++ b/core/http/static/general.css @@ -71,3 +71,23 @@ body { flex-grow: 1; margin: 0.5rem; } + +ul { + list-style-type: disc; /* Adds bullet points */ + padding-left: 1.25rem; /* Indents the list from the left margin */ + margin-top: 1rem; /* Space above the list */ +} + +li { + font-size: 0.875rem; /* Small text size */ + color: #4a5568; /* Dark gray text */ + background-color: #f7fafc; /* Very light gray background */ + border-radius: 0.375rem; /* Rounded corners */ + padding: 0.5rem; /* Padding inside each list item */ + box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */ + margin-bottom: 0.5rem; /* Vertical space between list items */ +} + +li:last-child { + margin-bottom: 0; /* Removes bottom margin from the last item */ +} diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 1a14bbc3..eebf9083 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -106,7 +106,7 @@ SOFTWARE.
-
+
+
- + + +
@@ -146,7 +170,7 @@ SOFTWARE. clear() { this.history.length = 0; }, - add(role, content) { + add(role, content, image) { const N = this.history.length - 1; if (this.history.length && this.history[N].role === role) { this.history[N].content += content; @@ -167,6 +191,7 @@ SOFTWARE. role: role, content: content, html: c, + image: image, }); } @@ -191,6 +216,7 @@ SOFTWARE. return { role: message.role, content: message.content, + image: message.image, }; }); }, diff --git a/core/http/views/index.html b/core/http/views/index.html index f8cae175..66de37fa 100644 --- a/core/http/views/index.html +++ b/core/http/views/index.html @@ -10,38 +10,76 @@

Welcome to your LocalAI instance!

-
- -

The FOSS alternative to OpenAI, Claude, ...

Documentation
-
+
+ + + {{ if .ProcessingModels }} +

Operations in progress

+ {{end}} + {{$taskType:=.TaskTypes}} + {{ range $key,$value:=.ProcessingModels }} + {{ $op := index $taskType $key}} + {{$parts := split "@" $key}} +
+
+ {{$parts._1}} (from the '{{$parts._0}}' repository) +
+
+

{{$op}} +

+
+
+ {{ end }} + + {{ if eq (len .ModelsConfig) 0 }}

Ouch! seems you don't have any models installed!

..install something from the 🖼️ Gallery or check the Getting started documentation

{{ else }}

Installed models

We have {{len .ModelsConfig}} pre-loaded models available.

-
    + + + + + + + + + + {{$galleryConfig:=.GalleryConfig}} + {{$noicon:="https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"}} {{ range .ModelsConfig }} {{ $cfg:= index $galleryConfig .Name}} -
  • -
    - +
  • + + + + {{ end }} - + +
    Model NameBackendActions
    + {{ with $cfg }} - -

    {{.Name}}

    + {{ else}} + + {{ end }} +
    +

    {{.Name}}

    +
    {{ if .Backend }} @@ -52,16 +90,20 @@ auto {{ end }} + - - - +
    {{ end }} + + +
diff --git a/core/http/views/models.html b/core/http/views/models.html index 17561594..fe5af2d5 100644 --- a/core/http/views/models.html +++ b/core/http/views/models.html @@ -63,8 +63,33 @@ {{ end }}
+ - + {{ if .ProcessingModels }} +

Operations in progress

+ {{end}} + {{$taskType:=.TaskTypes}} + {{ range $key,$value:=.ProcessingModels }} + {{ $op := index $taskType $key}} + {{$parts := split "@" $key}} +
+
+ {{$parts._1}} (from the '{{$parts._0}}' repository) +
+
+

{{$op}} +

+
+
+ {{ end }} + + + Date: Wed, 8 May 2024 19:34:33 +0200 Subject: [PATCH 0481/2895] fix(ux): fix small glitches (#2265) also drop duplicates for displaying in-progress model ops Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 80 +++++++++++++----------- core/http/routes/ui.go | 29 +++++++-- core/http/views/chat.html | 3 +- core/http/views/index.html | 26 +------- core/http/views/models.html | 24 +------ core/http/views/partials/inprogress.html | 32 ++++++++++ 6 files changed, 104 insertions(+), 90 deletions(-) create mode 100644 core/http/views/partials/inprogress.html diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 16a74553..7ca34aef 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -12,17 +12,20 @@ import ( ) const ( - NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" + noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" ) func DoneProgress(galleryID, text string, showDelete bool) string { + var modelName = galleryID // Split by @ and grab the name if strings.Contains(galleryID, "@") { - galleryID = strings.Split(galleryID, "@")[1] + modelName = strings.Split(galleryID, "@")[1] } return elem.Div( - attrs.Props{}, + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + }, elem.H3( attrs.Props{ "role": "status", @@ -32,7 +35,7 @@ func DoneProgress(galleryID, text string, showDelete bool) string { }, elem.Text(text), ), - elem.If(showDelete, deleteButton(galleryID), reInstallButton(galleryID)), + elem.If(showDelete, deleteButton(galleryID, modelName), reInstallButton(galleryID)), ).Render() } @@ -77,7 +80,7 @@ func StartProgressBar(uid, progress, text string) string { attrs.Props{ "hx-trigger": "done", "hx-get": "/browse/job/" + uid, - "hx-swap": "innerHTML", + "hx-swap": "outerHTML", "hx-target": "this", }, elem.H3( @@ -88,7 +91,6 @@ func StartProgressBar(uid, progress, text string) string { "autofocus": "", }, elem.Text(text), - // This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms. elem.Div(attrs.Props{ "hx-get": "/browse/job/progress/" + uid, "hx-trigger": "every 600ms", @@ -192,6 +194,7 @@ func reInstallButton(galleryName string) elem.Node { "data-twe-ripple-init": "", "data-twe-ripple-color": "light", "class": "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryName), "hx-swap": "outerHTML", // post the Model ID as param "hx-post": "/browse/install/model/" + galleryName, @@ -205,16 +208,17 @@ func reInstallButton(galleryName string) elem.Node { ) } -func deleteButton(modelName string) elem.Node { +func deleteButton(galleryID, modelName string) elem.Node { return elem.Button( attrs.Props{ "data-twe-ripple-init": "", "data-twe-ripple-color": "light", "hx-confirm": "Are you sure you wish to delete the model?", "class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryID), "hx-swap": "outerHTML", // post the Model ID as param - "hx-post": "/browse/delete/model/" + modelName, + "hx-post": "/browse/delete/model/" + galleryID, }, elem.I( attrs.Props{ @@ -225,20 +229,14 @@ func deleteButton(modelName string) elem.Node { ) } +// Javascript/HTMX doesn't like weird IDs +func dropBadChars(s string) string { + return strings.ReplaceAll(s, "@", "__") +} + func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string { - //StartProgressBar(uid, "0") modelsElements := []elem.Node{} - // span := func(s string) elem.Node { - // return elem.Span( - // attrs.Props{ - // "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", - // }, - // elem.Text(s), - // ) - // } - descriptionDiv := func(m *gallery.GalleryModel) elem.Node { - return elem.Div( attrs.Props{ "class": "p-6 text-surface dark:text-white", @@ -261,13 +259,16 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri actionDiv := func(m *gallery.GalleryModel) elem.Node { galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) currentlyProcessing := processing.Exists(galleryID) + jobID := "" isDeletionOp := false if currentlyProcessing { status := galleryService.GetStatus(galleryID) if status != nil && status.Deletion { isDeletionOp = true } - // if status == nil : "Waiting" + jobID = processing.Get(galleryID) + // TODO: + // case not handled, if status == nil : "Waiting" } nodes := []elem.Node{ @@ -317,29 +318,33 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri }, nodes..., ), - elem.If( - currentlyProcessing, - elem.Node( // If currently installing, show progress bar - elem.Raw(StartProgressBar(processing.Get(galleryID), "0", progressMessage)), - ), // Otherwise, show install button (if not installed) or display "Installed" - elem.If(m.Installed, - elem.Node(elem.Div( - attrs.Props{}, - reInstallButton(m.ID()), - deleteButton(m.Name), - )), - installButton(m.ID()), + elem.Div( + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + }, + elem.If( + currentlyProcessing, + elem.Node( // If currently installing, show progress bar + elem.Raw(StartProgressBar(jobID, "0", progressMessage)), + ), // Otherwise, show install button (if not installed) or display "Installed" + elem.If(m.Installed, + elem.Node(elem.Div( + attrs.Props{}, + reInstallButton(m.ID()), + deleteButton(m.ID(), m.Name), + )), + installButton(m.ID()), + ), ), ), ) } for _, m := range models { - elems := []elem.Node{} if m.Icon == "" { - m.Icon = NoImage + m.Icon = noImage } divProperties := attrs.Props{ @@ -347,7 +352,6 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri } elems = append(elems, - elem.Div(divProperties, elem.A(attrs.Props{ "href": "#!", @@ -359,8 +363,11 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri "src": m.Icon, }), ), - )) + ), + ) + // Special/corner case: if a model sets Trust Remote Code as required, show a warning + // TODO: handle this more generically later _, trustRemoteCodeExists := m.Overrides["trust_remote_code"] if trustRemoteCodeExists { elems = append(elems, elem.Div( @@ -392,7 +399,6 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri wrapper := elem.Div(attrs.Props{ "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark", - //"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark", }, modelsElements...) return wrapper.Render() diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index d376d10e..8cbb4b28 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -14,6 +14,7 @@ import ( "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/xsync" + "github.com/rs/zerolog/log" "github.com/gofiber/fiber/v2" "github.com/google/uuid" @@ -117,6 +118,7 @@ func RegisterUIRoutes(app *fiber.App, // https://htmx.org/examples/progress-bar/ app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID) id, err := uuid.NewUUID() if err != nil { @@ -143,6 +145,14 @@ func RegisterUIRoutes(app *fiber.App, // https://htmx.org/examples/progress-bar/ app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error { galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID) + var galleryName = galleryID + if strings.Contains(galleryID, "@") { + // if the galleryID contains a @ it means that it's a model from a gallery + // but we want to delete it from the local models which does not need + // a repository ID + galleryName = strings.Split(galleryID, "@")[1] + } id, err := uuid.NewUUID() if err != nil { @@ -151,16 +161,20 @@ func RegisterUIRoutes(app *fiber.App, uid := id.String() + // Track the deletion job by galleryID and galleryName + // The GalleryID contains information about the repository, + // while the GalleryName is ONLY the name of the model + processingModels.Set(galleryName, uid) processingModels.Set(galleryID, uid) op := gallery.GalleryOp{ Id: uid, Delete: true, - GalleryModelName: galleryID, + GalleryModelName: galleryName, } go func() { galleryService.C <- op - cl.RemoveBackendConfig(galleryID) + cl.RemoveBackendConfig(galleryName) }() return c.SendString(elements.StartProgressBar(uid, "0", "Deletion")) @@ -170,7 +184,7 @@ func RegisterUIRoutes(app *fiber.App, // If the job is done, we trigger the /browse/job/:uid route // https://htmx.org/examples/progress-bar/ app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { - jobUID := c.Params("uid") + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! status := galleryService.GetStatus(jobUID) if status == nil { @@ -192,17 +206,22 @@ func RegisterUIRoutes(app *fiber.App, // this route is hit when the job is done, and we display the // final state (for now just displays "Installation completed") app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! - status := galleryService.GetStatus(c.Params("uid")) + status := galleryService.GetStatus(jobUID) galleryID := "" for _, k := range processingModels.Keys() { - if processingModels.Get(k) == c.Params("uid") { + if processingModels.Get(k) == jobUID { galleryID = k processingModels.Delete(k) } } + if galleryID == "" { + log.Debug().Msgf("no processing model found for job : %+v\n", jobUID) + } + log.Debug().Msgf("JOB finished : %+v\n", status) showDelete := true displayText := "Installation completed" if status.Deletion { diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 190cb877..7f13c7bd 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -113,7 +113,8 @@ SOFTWARE.

- Start chatting with the AI by typing a prompt in the input field below. + Start chatting with the AI by typing a prompt in the input field below and pressing Enter. + For models that support images, you can upload an image by clicking the paperclip icon.