diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 48aebfb7..c0e1c051 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -71,7 +71,7 @@ jobs: run: | sudo apt-get update sudo apt-get install build-essential ccache upx-ucl curl ffmpeg - sudo apt-get install -y libgmock-dev + sudo apt-get install -y libgmock-dev clang curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ diff --git a/Makefile b/Makefile index 7917b119..e7987701 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai DETECT_LIBS?=true # llama.cpp versions -CPPLLAMA_VERSION?=ecda2ec4b347031a9b8a89ee2efc664ce63f599c +CPPLLAMA_VERSION?=295354ea6848a77bdee204ee1c971d9b92ffcca9 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt index 2f40b320..54b47b8e 100644 --- a/backend/python/bark/requirements.txt +++ b/backend/python/bark/requirements.txt @@ -1,4 +1,4 @@ bark==0.1.5 -grpcio==1.71.0 +grpcio==1.72.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt index e4d843df..16574534 100644 --- a/backend/python/common/template/requirements.txt +++ b/backend/python/common/template/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf grpcio-tools \ No newline at end of file diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt index 108d30ba..3c6b59b1 100644 --- a/backend/python/coqui/requirements.txt +++ b/backend/python/coqui/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf certifi packaging==24.1 \ No newline at end of file diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt index 8cfe88a7..d5af9b8f 100644 --- a/backend/python/diffusers/requirements.txt +++ b/backend/python/diffusers/requirements.txt @@ -1,5 +1,5 @@ setuptools -grpcio==1.71.0 +grpcio==1.72.0 pillow protobuf certifi diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt index ed8ffec4..c9c8b8e9 100644 --- a/backend/python/exllama2/requirements.txt +++ b/backend/python/exllama2/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf certifi wheel diff --git a/backend/python/faster-whisper/requirements.txt b/backend/python/faster-whisper/requirements.txt index e4d843df..16574534 100644 --- a/backend/python/faster-whisper/requirements.txt +++ b/backend/python/faster-whisper/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf grpcio-tools \ No newline at end of file diff --git a/backend/python/kokoro/requirements.txt b/backend/python/kokoro/requirements.txt index cf0f0143..4a1e1f49 100644 --- a/backend/python/kokoro/requirements.txt +++ b/backend/python/kokoro/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf phonemizer scipy diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt index 931cb146..8277a7c1 100644 --- a/backend/python/rerankers/requirements.txt +++ b/backend/python/rerankers/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index 9ba6c861..ce8bfd6c 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf certifi setuptools diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt index f1771cc4..379c87e0 100644 --- a/backend/python/vllm/requirements.txt +++ b/backend/python/vllm/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.71.0 +grpcio==1.72.0 protobuf certifi setuptools \ No newline at end of file diff --git a/core/config/guesser.go b/core/config/guesser.go index b7fb23de..e66df70d 100644 --- a/core/config/guesser.go +++ b/core/config/guesser.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" + "github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/rs/zerolog/log" gguf "github.com/thxcode/gguf-parser-go" ) @@ -35,4 +36,10 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) } cfg.ContextSize = &defaultCtx } + + if cfg.Options == nil { + if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") { + cfg.Options = []string{"gpu"} + } + } } diff --git a/gallery/index.yaml b/gallery/index.yaml index 66db118b..8d07009e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -556,6 +556,21 @@ - filename: amoral-gemma3-1B-v2.Q4_K_M.gguf sha256: 7f2167d91409cabaf0a42e41e833a6ca055c841a37d8d829e11db81fdaed5e4c uri: huggingface://mradermacher/amoral-gemma3-1B-v2-GGUF/amoral-gemma3-1B-v2.Q4_K_M.gguf +- !!merge <<: *gemma3 + name: "soob3123_veritas-12b" + icon: https://cdn-uploads.huggingface.co/production/uploads/62f93f9477b722f1866398c2/IuhCq-5PcEbDBqXD5xnup.png + urls: + - https://huggingface.co/soob3123/Veritas-12B + - https://huggingface.co/bartowski/soob3123_Veritas-12B-GGUF + description: | + Veritas-12B emerges as a model forged in the pursuit of intellectual clarity and logical rigor. This 12B parameter model possesses superior philosophical reasoning capabilities and analytical depth, ideal for exploring complex ethical dilemmas, deconstructing arguments, and engaging in structured philosophical dialogue. Veritas-12B excels at articulating nuanced positions, identifying logical fallacies, and constructing coherent arguments grounded in reason. Expect discussions characterized by intellectual honesty, critical analysis, and a commitment to exploring ideas with precision. + overrides: + parameters: + model: soob3123_Veritas-12B-Q4_K_M.gguf + files: + - filename: soob3123_Veritas-12B-Q4_K_M.gguf + sha256: 41821d6b0dd2b81a5bddd843a5534fd64d95e75b8e9dc952340868af320d49a7 + uri: huggingface://bartowski/soob3123_Veritas-12B-GGUF/soob3123_Veritas-12B-Q4_K_M.gguf - &llama4 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 1a7fdc9c..a0d0d5fc 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -181,10 +181,6 @@ func orderBackends(backends map[string][]string) ([]string, error) { // selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities // Note: this is now relevant only for llama.cpp func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string { - foundCUDA := false - foundAMDGPU := false - foundIntelGPU := false - var grpcProcess string // Select backend now just for llama.cpp if backend != LLamaCPP { @@ -198,48 +194,24 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str } // Check for GPU-binaries that are shipped with single binary releases - gpus, err := xsysinfo.GPUs() - if err == nil { - for _, gpu := range gpus { - if strings.Contains(gpu.String(), "nvidia") { - p := backendPath(assetDir, LLamaCPPCUDA) - if _, err := os.Stat(p); err == nil { - log.Info().Msgf("[%s] attempting to load with CUDA variant", backend) - grpcProcess = p - foundCUDA = true - } else { - log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support") - } - } - if strings.Contains(gpu.String(), "amd") { - p := backendPath(assetDir, LLamaCPPHipblas) - if _, err := os.Stat(p); err == nil { - log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend) - grpcProcess = p - foundAMDGPU = true - } else { - log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support") - } - } - if strings.Contains(gpu.String(), "intel") { - backend := LLamaCPPSycl16 - if !f16 { - backend = LLamaCPPSycl32 - } - p := backendPath(assetDir, backend) - if _, err := os.Stat(p); err == nil { - log.Info().Msgf("[%s] attempting to load with Intel variant", backend) - grpcProcess = p - foundIntelGPU = true - } else { - log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support") - } - } - } + gpuBinaries := map[string]string{ + "nvidia": LLamaCPPCUDA, + "amd": LLamaCPPHipblas, + "intel": LLamaCPPSycl16, } - if foundCUDA || foundAMDGPU || foundIntelGPU { - return grpcProcess + if !f16 { + gpuBinaries["intel"] = LLamaCPPSycl32 + } + + for vendor, binary := range gpuBinaries { + if xsysinfo.HasGPU(vendor) { + p := backendPath(assetDir, binary) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with %s variant (vendor: %s)", backend, binary, vendor) + return p + } + } } // No GPU found or no specific binaries found, try to load the CPU variant(s) diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go index b6321e81..a692c775 100644 --- a/pkg/xsysinfo/gpu.go +++ b/pkg/xsysinfo/gpu.go @@ -1,6 +1,8 @@ package xsysinfo import ( + "strings" + "github.com/jaypipes/ghw" "github.com/jaypipes/ghw/pkg/gpu" ) @@ -13,3 +15,19 @@ func GPUs() ([]*gpu.GraphicsCard, error) { return gpu.GraphicsCards, nil } + +func HasGPU(vendor string) bool { + gpus, err := GPUs() + if err != nil { + return false + } + if vendor == "" { + return len(gpus) > 0 + } + for _, gpu := range gpus { + if strings.Contains(gpu.String(), vendor) { + return true + } + } + return false +}