feat: add initial AutoGPTQ backend implementation

This commit is contained in:
Ettore Di Giacinto 2023-08-07 22:39:10 +02:00
parent 91d49cfe9f
commit a843e64fc2
37 changed files with 660 additions and 148 deletions

View file

@ -89,6 +89,12 @@ message ModelOptions {
float RopeFreqScale = 18;
float RMSNormEps = 19;
int32 NGQA = 20;
string ModelFile = 21;
// AutoGPTQ
string Device = 22;
bool UseTriton = 23;
string ModelBaseName = 24;
}
message Result {