diff --git a/.env b/.env index 86596105..b0d1a2ad 100644 --- a/.env +++ b/.env @@ -76,7 +76,7 @@ ### Define a list of GRPC Servers for llama-cpp workers to distribute the load # https://github.com/ggerganov/llama.cpp/pull/6829 -# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md +# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md # LLAMACPP_GRPC_SERVERS="" ### Enable to run parallel requests diff --git a/Makefile b/Makefile index d126a390..008e0bdf 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai DETECT_LIBS?=true # llama.cpp versions -CPPLLAMA_VERSION?=d7a14c42a1883a34a6553cbfe30da1e1b84dfd6a +CPPLLAMA_VERSION?=1d36b3670b285e69e58b9d687c770a2a0a192194 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index 21aea285..f4231720 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -59,8 +59,8 @@ llama.cpp: git checkout -b build $(LLAMA_VERSION) && \ git submodule update --init --recursive --depth 1 --single-branch -llama.cpp/examples/grpc-server: llama.cpp - mkdir -p llama.cpp/examples/grpc-server +llama.cpp/tools/grpc-server: llama.cpp + mkdir -p llama.cpp/tools/grpc-server bash prepare.sh rebuild: @@ -70,13 +70,13 @@ rebuild: purge: rm -rf llama.cpp/build - rm -rf llama.cpp/examples/grpc-server + rm -rf llama.cpp/tools/grpc-server rm -rf grpc-server clean: purge rm -rf llama.cpp -grpc-server: llama.cpp llama.cpp/examples/grpc-server +grpc-server: llama.cpp llama.cpp/tools/grpc-server @echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" ifneq (,$(findstring sycl,$(BUILD_TYPE))) +bash -c "source $(ONEAPI_VARS); \ diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index fb5dd343..a3279654 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -52,7 +52,7 @@ struct server_params { std::string hostname = "127.0.0.1"; std::vector api_keys; - std::string public_path = "examples/server/public"; + std::string public_path = "tools/server/public"; std::string chat_template = ""; int32_t port = 8080; int32_t read_timeout = 600; diff --git a/backend/cpp/llama/patches/01-llava.patch b/backend/cpp/llama/patches/01-llava.patch index 77124628..6e2abde2 100644 --- a/backend/cpp/llama/patches/01-llava.patch +++ b/backend/cpp/llama/patches/01-llava.patch @@ -1,7 +1,7 @@ -diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp +diff --git a/tools/llava/clip.cpp b/tools/llava/clip.cpp index 3cd0d2fa..6c5e811a 100644 ---- a/examples/llava/clip.cpp -+++ b/examples/llava/clip.cpp +--- a/tools/llava/clip.cpp ++++ b/tools/llava/clip.cpp @@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); int* patches_data = (int*)malloc(ggml_nbytes(patches)); diff --git a/backend/cpp/llama/prepare.sh b/backend/cpp/llama/prepare.sh index eabd93c5..f332bc48 100644 --- a/backend/cpp/llama/prepare.sh +++ b/backend/cpp/llama/prepare.sh @@ -7,22 +7,22 @@ for patch in $(ls patches); do patch -d llama.cpp/ -p1 < patches/$patch done -cp -r CMakeLists.txt llama.cpp/examples/grpc-server/ -cp -r grpc-server.cpp llama.cpp/examples/grpc-server/ -cp -rfv json.hpp llama.cpp/examples/grpc-server/ -cp -rfv utils.hpp llama.cpp/examples/grpc-server/ +cp -r CMakeLists.txt llama.cpp/tools/grpc-server/ +cp -r grpc-server.cpp llama.cpp/tools/grpc-server/ +cp -rfv json.hpp llama.cpp/tools/grpc-server/ +cp -rfv utils.hpp llama.cpp/tools/grpc-server/ -if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then +if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then echo "grpc-server already added" else - echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt + echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt fi ## XXX: In some versions of CMake clip wasn't being built before llama. ## This is an hack for now, but it should be fixed in the future. -cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h -cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h -cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp -echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h -cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h -cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp \ No newline at end of file +cp -rfv llama.cpp/tools/llava/clip.h llama.cpp/tools/grpc-server/clip.h +cp -rfv llama.cpp/tools/llava/clip-impl.h llama.cpp/tools/grpc-server/clip-impl.h +cp -rfv llama.cpp/tools/llava/llava.cpp llama.cpp/tools/grpc-server/llava.cpp +echo '#include "llama.h"' > llama.cpp/tools/grpc-server/llava.h +cat llama.cpp/tools/llava/llava.h >> llama.cpp/tools/grpc-server/llava.h +cp -rfv llama.cpp/tools/llava/clip.cpp llama.cpp/tools/grpc-server/clip.cpp \ No newline at end of file diff --git a/backend/cpp/llama/utils.hpp b/backend/cpp/llama/utils.hpp index 198b6f26..0816ef56 100644 --- a/backend/cpp/llama/utils.hpp +++ b/backend/cpp/llama/utils.hpp @@ -1,4 +1,4 @@ -// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp +// https://github.com/ggerganov/llama.cpp/blob/master/tools/server/utils.hpp #pragma once