From b82577d6423b2c18a8d14b93d05b4f484a80a197 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 15 Oct 2024 09:41:53 +0200 Subject: [PATCH] fix(llama.cpp): consider also native builds (#3839) This is in order to identify also builds which are not using alternatives based on capabilities. For instance, there are cases when we build the backend only natively in the host. Signed-off-by: Ettore Di Giacinto --- pkg/model/initializers.go | 49 ++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index c3b37179..bd668ec2 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str // No GPU found or no specific binaries found, try to load the CPU variant(s) - // Select the Fallback by default - selectedProcess := backendPath(assetDir, LLamaCPPFallback) + // Select a binary based on availability/capability + selectedProcess := "" + + // Check if we have a native build (llama-cpp) and use that + if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil { + log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend) + selectedProcess = backendPath(assetDir, LLamaCPPFallback) + } + + // Check if we have a native build (llama-cpp) and use that instead + // As a reminder, we do ultimately attempt again with the fallback variant + // If things fail with what we select here + if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil { + log.Debug().Msgf("[%s] attempting to load with native variant", backend) + selectedProcess = backendPath(assetDir, LLamaCPP) + } // IF we find any optimized binary, we use that if xsysinfo.HasCPUCaps(cpuid.AVX2) { @@ -269,7 +283,7 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str } } - // Check if the binary exists! + // Safety measure: check if the binary exists otherwise return empty string if _, err := os.Stat(selectedProcess); err == nil { return selectedProcess } @@ -277,6 +291,21 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str return "" } +func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) { + // XXX: This is too backend specific(llama-cpp), remove this bit or generalize further + // We failed somehow starting the binary. For instance, could be that we are missing + // some libraries if running in binary-only mode. + // In this case, we attempt to load the model with the fallback variant. + + // If not llama-cpp backend, return the error immediately + if backend != LLamaCPP { + return nil, err + } + + log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error()) + return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o)) +} + // starts the grpcModelProcess for the backend, and returns a grpc client // It also loads the model func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) { @@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o)) if err != nil { - // XXX: This is too backend specific(llama-cpp), remove this bit or generalize further - // We failed somehow starting the binary. For instance, could be that we are missing - // some libraries if running in binary-only mode. - // In this case, we attempt to load the model with the fallback variant. - - // If not llama-cpp backend, return error immediately - if backend != LLamaCPP { - return nil, err - } - - // Otherwise attempt with fallback - log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s'", backend, LLamaCPPFallback) - model, err = ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o)) + model, err = attemptLoadingOnFailure(backend, ml, o, err) if err != nil { return nil, err }