This commit is contained in:
Ettore Di Giacinto 2025-06-01 06:28:27 +00:00 committed by GitHub
commit 942279a54d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,8 +1,6 @@
package xsysinfo package xsysinfo
import ( import (
"errors"
gguf "github.com/gpustack/gguf-parser-go" gguf "github.com/gpustack/gguf-parser-go"
) )
@ -18,28 +16,38 @@ type VRAMEstimate struct {
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) { func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
// Get model metadata // Get model metadata
m := f.Metadata() m := f.Metadata()
a := f.Architecture()
estimate := f.EstimateLLaMACppRun()
lmes := estimate.SummarizeItem(true, 0, 0)
estimatedVRAM := uint64(0)
availableLayers := lmes.OffloadLayers // TODO: check if we can just use OffloadLayers here
for _, vram := range lmes.VRAMs {
estimatedVRAM += uint64(vram.NonUMA)
}
// Calculate base model size // Calculate base model size
modelSize := uint64(m.Size) modelSize := uint64(m.Size)
if a.BlockCount == 0 { if availableLayers == 0 {
return nil, errors.New("block count is 0") availableLayers = 1
}
if estimatedVRAM == 0 {
estimatedVRAM = 1
} }
// Estimate number of layers that can fit in VRAM // Estimate number of layers that can fit in VRAM
// Each layer typically requires about 1/32 of the model size // Each layer typically requires about 1/32 of the model size
layerSize := modelSize / uint64(a.BlockCount) layerSize := estimatedVRAM / availableLayers
estimatedLayers := int(availableVRAM / layerSize)
// If we can't fit even one layer, we need to do full offload estimatedLayers := int(availableVRAM / layerSize)
isFullOffload := estimatedLayers <= 0 if availableVRAM > estimatedVRAM {
if isFullOffload { estimatedLayers = int(availableLayers)
estimatedLayers = 0
} }
// Calculate estimated VRAM usage // Calculate estimated VRAM usage
estimatedVRAM := uint64(estimatedLayers) * layerSize
return &VRAMEstimate{ return &VRAMEstimate{
TotalVRAM: availableVRAM, TotalVRAM: availableVRAM,
@ -47,6 +55,6 @@ func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimat
ModelSize: modelSize, ModelSize: modelSize,
EstimatedLayers: estimatedLayers, EstimatedLayers: estimatedLayers,
EstimatedVRAM: estimatedVRAM, EstimatedVRAM: estimatedVRAM,
IsFullOffload: isFullOffload, IsFullOffload: availableVRAM > estimatedVRAM,
}, nil }, nil
} }