chore: detect and enable avx512 builds (#4675)

chore(avx512): add support

Fixes https://github.com/mudler/LocalAI/issues/4662

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-01-24 08:26:44 +01:00 committed by GitHub
parent f9e368b7c4
commit 5177837ab0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 17 additions and 1 deletions

View file

@ -303,7 +303,7 @@ RUN make prepare
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build ## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
## (both will use CUDA or hipblas for the actual computation) ## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \ RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \ SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \ else \
make build; \ make build; \
fi fi

View file

@ -186,6 +186,7 @@ endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
@ -699,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2 cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx512
$(MAKE) -C backend/cpp/llama-avx512 purge
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge $(MAKE) -C backend/cpp/llama-avx purge

View file

@ -48,6 +48,7 @@ const (
LLamaCPP = "llama-cpp" LLamaCPP = "llama-cpp"
LLamaCPPAVX2 = "llama-cpp-avx2" LLamaCPPAVX2 = "llama-cpp-avx2"
LLamaCPPAVX512 = "llama-cpp-avx512"
LLamaCPPAVX = "llama-cpp-avx" LLamaCPPAVX = "llama-cpp-avx"
LLamaCPPFallback = "llama-cpp-fallback" LLamaCPPFallback = "llama-cpp-fallback"
LLamaCPPCUDA = "llama-cpp-cuda" LLamaCPPCUDA = "llama-cpp-cuda"
@ -68,6 +69,7 @@ const (
var llamaCPPVariants = []string{ var llamaCPPVariants = []string{
LLamaCPPAVX2, LLamaCPPAVX2,
LLamaCPPAVX512,
LLamaCPPAVX, LLamaCPPAVX,
LLamaCPPFallback, LLamaCPPFallback,
LLamaCPPCUDA, LLamaCPPCUDA,
@ -268,6 +270,12 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend) log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
selectedProcess = p selectedProcess = p
} }
} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
p := backendPath(assetDir, LLamaCPPAVX512)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
selectedProcess = p
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX) { } else if xsysinfo.HasCPUCaps(cpuid.AVX) {
p := backendPath(assetDir, LLamaCPPAVX) p := backendPath(assetDir, LLamaCPPAVX)
if _, err := os.Stat(p); err == nil { if _, err := os.Stat(p); err == nil {