mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-30 15:35:01 +00:00
feat(llama.cpp): add distributed llama.cpp inferencing (#2324)
* feat(llama.cpp): support distributed llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: let tweak how chat messages are merged together Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactor Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Makefile: register to ALL_GRPC_BACKENDS Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring, allow disable auto-detection of backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * minor fixups Signed-off-by: mudler <mudler@localai.io> * feat: add cmd to start rpc-server from llama.cpp Signed-off-by: mudler <mudler@localai.io> * ci: add ccache Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
parent
29909666c3
commit
c89271b2e4
11 changed files with 222 additions and 82 deletions
17
Makefile
17
Makefile
|
@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
|||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=dc685be46622a8fabfd57cfa804237c8f15679b8
|
||||
CPPLLAMA_VERSION?=4f0263633b40e94e8b69fd6e7e4395cfedfd5c12
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
|
@ -158,6 +158,8 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
|||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
|
@ -314,7 +316,7 @@ build: prepare backend-assets grpcs ## Build the project
|
|||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||
|
||||
build-minimal:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
|
||||
|
||||
build-api:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||
|
@ -691,6 +693,17 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
|||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||
|
||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||
|
||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue