mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
chore: detect and enable avx512 builds (#4675)
chore(avx512): add support Fixes https://github.com/mudler/LocalAI/issues/4662 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
f9e368b7c4
commit
5177837ab0
3 changed files with 17 additions and 1 deletions
|
@ -303,7 +303,7 @@ RUN make prepare
|
||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||||
## (both will use CUDA or hipblas for the actual computation)
|
## (both will use CUDA or hipblas for the actual computation)
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
fi
|
fi
|
||||||
|
|
8
Makefile
8
Makefile
|
@ -186,6 +186,7 @@ endif
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
|
@ -699,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
||||||
|
$(MAKE) -C backend/cpp/llama-avx512 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
|
|
|
@ -48,6 +48,7 @@ const (
|
||||||
LLamaCPP = "llama-cpp"
|
LLamaCPP = "llama-cpp"
|
||||||
|
|
||||||
LLamaCPPAVX2 = "llama-cpp-avx2"
|
LLamaCPPAVX2 = "llama-cpp-avx2"
|
||||||
|
LLamaCPPAVX512 = "llama-cpp-avx512"
|
||||||
LLamaCPPAVX = "llama-cpp-avx"
|
LLamaCPPAVX = "llama-cpp-avx"
|
||||||
LLamaCPPFallback = "llama-cpp-fallback"
|
LLamaCPPFallback = "llama-cpp-fallback"
|
||||||
LLamaCPPCUDA = "llama-cpp-cuda"
|
LLamaCPPCUDA = "llama-cpp-cuda"
|
||||||
|
@ -68,6 +69,7 @@ const (
|
||||||
|
|
||||||
var llamaCPPVariants = []string{
|
var llamaCPPVariants = []string{
|
||||||
LLamaCPPAVX2,
|
LLamaCPPAVX2,
|
||||||
|
LLamaCPPAVX512,
|
||||||
LLamaCPPAVX,
|
LLamaCPPAVX,
|
||||||
LLamaCPPFallback,
|
LLamaCPPFallback,
|
||||||
LLamaCPPCUDA,
|
LLamaCPPCUDA,
|
||||||
|
@ -268,6 +270,12 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||||
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
|
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
|
||||||
selectedProcess = p
|
selectedProcess = p
|
||||||
}
|
}
|
||||||
|
} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
|
||||||
|
p := backendPath(assetDir, LLamaCPPAVX512)
|
||||||
|
if _, err := os.Stat(p); err == nil {
|
||||||
|
log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
|
||||||
|
selectedProcess = p
|
||||||
|
}
|
||||||
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
|
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
|
||||||
p := backendPath(assetDir, LLamaCPPAVX)
|
p := backendPath(assetDir, LLamaCPPAVX)
|
||||||
if _, err := os.Stat(p); err == nil {
|
if _, err := os.Stat(p); err == nil {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue