mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-06 10:54:59 +00:00
60 lines
1.4 KiB
Go
60 lines
1.4 KiB
Go
package xsysinfo
|
|
|
|
import (
|
|
gguf "github.com/gpustack/gguf-parser-go"
|
|
)
|
|
|
|
type VRAMEstimate struct {
|
|
TotalVRAM uint64
|
|
AvailableVRAM uint64
|
|
ModelSize uint64
|
|
EstimatedLayers int
|
|
EstimatedVRAM uint64
|
|
IsFullOffload bool
|
|
}
|
|
|
|
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
|
|
// Get model metadata
|
|
m := f.Metadata()
|
|
|
|
estimate := f.EstimateLLaMACppRun()
|
|
|
|
lmes := estimate.SummarizeItem(true, 0, 0)
|
|
estimatedVRAM := uint64(0)
|
|
availableLayers := lmes.OffloadLayers // TODO: check if we can just use OffloadLayers here
|
|
|
|
for _, vram := range lmes.VRAMs {
|
|
estimatedVRAM += uint64(vram.NonUMA)
|
|
}
|
|
|
|
// Calculate base model size
|
|
modelSize := uint64(m.Size)
|
|
|
|
if availableLayers == 0 {
|
|
availableLayers = 1
|
|
}
|
|
|
|
if estimatedVRAM == 0 {
|
|
estimatedVRAM = 1
|
|
}
|
|
|
|
// Estimate number of layers that can fit in VRAM
|
|
// Each layer typically requires about 1/32 of the model size
|
|
layerSize := estimatedVRAM / availableLayers
|
|
|
|
estimatedLayers := int(availableVRAM / layerSize)
|
|
if availableVRAM > estimatedVRAM {
|
|
estimatedLayers = int(availableLayers)
|
|
}
|
|
|
|
// Calculate estimated VRAM usage
|
|
|
|
return &VRAMEstimate{
|
|
TotalVRAM: availableVRAM,
|
|
AvailableVRAM: availableVRAM,
|
|
ModelSize: modelSize,
|
|
EstimatedLayers: estimatedLayers,
|
|
EstimatedVRAM: estimatedVRAM,
|
|
IsFullOffload: availableVRAM > estimatedVRAM,
|
|
}, nil
|
|
}
|