mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
feat(llama.cpp): support embeddings endpoints (#2871)
* feat(llama.cpp): add embeddings Also enable embeddings by default for llama.cpp models Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(Makefile): prepare llama.cpp sources only once Otherwise we keep cloning llama.cpp for each of the variants Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not set embeddings to false Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs: add embeddings to the YAML config reference Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
6564e7ea01
commit
35561edb6e
5 changed files with 44 additions and 12 deletions
|
@ -2108,6 +2108,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||
data["grammar"] = predict->grammar();
|
||||
data["prompt"] = predict->prompt();
|
||||
data["ignore_eos"] = predict->ignoreeos();
|
||||
data["embeddings"] = predict->embeddings();
|
||||
|
||||
// for each image in the request, add the image data
|
||||
//
|
||||
|
@ -2385,6 +2386,31 @@ public:
|
|||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
|
||||
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
|
||||
json data = parse_options(false, request, llama);
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
|
||||
// get the result
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
|
||||
llama.queue_results.remove_waiting_task_id(task_id);
|
||||
if (!result.error && result.stop) {
|
||||
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
|
||||
// loop the vector and set the embeddings results
|
||||
for (int i = 0; i < embeddings.size(); i++) {
|
||||
embeddingResult->add_embeddings(embeddings[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
};
|
||||
|
||||
void RunServer(const std::string& server_address) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue