mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat(llama.cpp): estimate vram usage (#5299)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
bace6516f1
commit
5c6cd50ed6
7 changed files with 131 additions and 21 deletions
52
pkg/xsysinfo/gguf.go
Normal file
52
pkg/xsysinfo/gguf.go
Normal file
|
@ -0,0 +1,52 @@
|
|||
package xsysinfo
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
gguf "github.com/gpustack/gguf-parser-go"
|
||||
)
|
||||
|
||||
type VRAMEstimate struct {
|
||||
TotalVRAM uint64
|
||||
AvailableVRAM uint64
|
||||
ModelSize uint64
|
||||
EstimatedLayers int
|
||||
EstimatedVRAM uint64
|
||||
IsFullOffload bool
|
||||
}
|
||||
|
||||
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
|
||||
// Get model metadata
|
||||
m := f.Metadata()
|
||||
a := f.Architecture()
|
||||
|
||||
// Calculate base model size
|
||||
modelSize := uint64(m.Size)
|
||||
|
||||
if a.BlockCount == 0 {
|
||||
return nil, errors.New("block count is 0")
|
||||
}
|
||||
|
||||
// Estimate number of layers that can fit in VRAM
|
||||
// Each layer typically requires about 1/32 of the model size
|
||||
layerSize := modelSize / uint64(a.BlockCount)
|
||||
estimatedLayers := int(availableVRAM / layerSize)
|
||||
|
||||
// If we can't fit even one layer, we need to do full offload
|
||||
isFullOffload := estimatedLayers <= 0
|
||||
if isFullOffload {
|
||||
estimatedLayers = 0
|
||||
}
|
||||
|
||||
// Calculate estimated VRAM usage
|
||||
estimatedVRAM := uint64(estimatedLayers) * layerSize
|
||||
|
||||
return &VRAMEstimate{
|
||||
TotalVRAM: availableVRAM,
|
||||
AvailableVRAM: availableVRAM,
|
||||
ModelSize: modelSize,
|
||||
EstimatedLayers: estimatedLayers,
|
||||
EstimatedVRAM: estimatedVRAM,
|
||||
IsFullOffload: isFullOffload,
|
||||
}, nil
|
||||
}
|
|
@ -16,6 +16,22 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
|
|||
return gpu.GraphicsCards, nil
|
||||
}
|
||||
|
||||
func TotalAvailableVRAM() (uint64, error) {
|
||||
gpus, err := GPUs()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var totalVRAM uint64
|
||||
for _, gpu := range gpus {
|
||||
if gpu.Node.Memory.TotalUsableBytes > 0 {
|
||||
totalVRAM += uint64(gpu.Node.Memory.TotalUsableBytes)
|
||||
}
|
||||
}
|
||||
|
||||
return totalVRAM, nil
|
||||
}
|
||||
|
||||
func HasGPU(vendor string) bool {
|
||||
gpus, err := GPUs()
|
||||
if err != nil {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue