LocalAI/pkg/xsysinfo/gguf.go
Ettore Di Giacinto 4909aa6750 feat: improve RAM estimation by using values from summary
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-29 22:42:58 +02:00

60 lines
1.4 KiB
Go

package xsysinfo
import (
gguf "github.com/gpustack/gguf-parser-go"
)
type VRAMEstimate struct {
TotalVRAM uint64
AvailableVRAM uint64
ModelSize uint64
EstimatedLayers int
EstimatedVRAM uint64
IsFullOffload bool
}
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
// Get model metadata
m := f.Metadata()
estimate := f.EstimateLLaMACppRun()
lmes := estimate.SummarizeItem(true, 0, 0)
estimatedVRAM := uint64(0)
availableLayers := lmes.OffloadLayers // TODO: check if we can just use OffloadLayers here
for _, vram := range lmes.VRAMs {
estimatedVRAM += uint64(vram.NonUMA)
}
// Calculate base model size
modelSize := uint64(m.Size)
if availableLayers == 0 {
availableLayers = 1
}
if estimatedVRAM == 0 {
estimatedVRAM = 1
}
// Estimate number of layers that can fit in VRAM
// Each layer typically requires about 1/32 of the model size
layerSize := estimatedVRAM / availableLayers
estimatedLayers := int(availableVRAM / layerSize)
if availableVRAM > estimatedVRAM {
estimatedLayers = int(availableLayers)
}
// Calculate estimated VRAM usage
return &VRAMEstimate{
TotalVRAM: availableVRAM,
AvailableVRAM: availableVRAM,
ModelSize: modelSize,
EstimatedLayers: estimatedLayers,
EstimatedVRAM: estimatedVRAM,
IsFullOffload: availableVRAM > estimatedVRAM,
}, nil
}