feat(llama.cpp): estimate vram usage (#5299)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 02:24:59 +00:00 · 2025-05-02 17:40:26 +02:00 · 2025-05-02 17:40:26 +02:00 · 5c6cd50ed6
commit 5c6cd50ed6
parent bace6516f1
7 changed files with 131 additions and 21 deletions
--- a/pkg/xsysinfo/gguf.go
+++ b/pkg/xsysinfo/gguf.go
@ -0,0 +1,52 @@
+package xsysinfo
+
+import (
+	"errors"
+
+	gguf "github.com/gpustack/gguf-parser-go"
+)
+
+type VRAMEstimate struct {
+	TotalVRAM       uint64
+	AvailableVRAM   uint64
+	ModelSize       uint64
+	EstimatedLayers int
+	EstimatedVRAM   uint64
+	IsFullOffload   bool
+}
+
+func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
+	// Get model metadata
+	m := f.Metadata()
+	a := f.Architecture()
+
+	// Calculate base model size
+	modelSize := uint64(m.Size)
+
+	if a.BlockCount == 0 {
+		return nil, errors.New("block count is 0")
+	}
+
+	// Estimate number of layers that can fit in VRAM
+	// Each layer typically requires about 1/32 of the model size
+	layerSize := modelSize / uint64(a.BlockCount)
+	estimatedLayers := int(availableVRAM / layerSize)
+
+	// If we can't fit even one layer, we need to do full offload
+	isFullOffload := estimatedLayers <= 0
+	if isFullOffload {
+		estimatedLayers = 0
+	}
+
+	// Calculate estimated VRAM usage
+	estimatedVRAM := uint64(estimatedLayers) * layerSize
+
+	return &VRAMEstimate{
+		TotalVRAM:       availableVRAM,
+		AvailableVRAM:   availableVRAM,
+		ModelSize:       modelSize,
+		EstimatedLayers: estimatedLayers,
+		EstimatedVRAM:   estimatedVRAM,
+		IsFullOffload:   isFullOffload,
+	}, nil
+}
--- a/pkg/xsysinfo/gpu.go
+++ b/pkg/xsysinfo/gpu.go
@ -16,6 +16,22 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
 	return gpu.GraphicsCards, nil
 }

+func TotalAvailableVRAM() (uint64, error) {
+	gpus, err := GPUs()
+	if err != nil {
+		return 0, err
+	}
+
+	var totalVRAM uint64
+	for _, gpu := range gpus {
+		if gpu.Node.Memory.TotalUsableBytes > 0 {
+			totalVRAM += uint64(gpu.Node.Memory.TotalUsableBytes)
+		}
+	}
+
+	return totalVRAM, nil
+}
+
 func HasGPU(vendor string) bool {
 	gpus, err := GPUs()
 	if err != nil {