feat: add initial AutoGPTQ backend implementation

This commit is contained in:
Ettore Di Giacinto 2023-08-07 22:39:10 +02:00
parent 91d49cfe9f
commit a843e64fc2
37 changed files with 660 additions and 148 deletions

View file

@ -19,6 +19,9 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
Seed: int32(c.Seed),
NBatch: int32(b),
NGQA: c.NGQA,
ModelBaseName: c.ModelBaseName,
Device: c.Device,
UseTriton: c.Triton,
RMSNormEps: c.RMSNormEps,
F16Memory: c.F16,
MLock: c.MMlock,