feat: add initial AutoGPTQ backend implementation

2025-05-28 06:25:00 +00:00 · 2023-08-07 22:39:10 +02:00 · 2023-08-07 22:39:10 +02:00 · a843e64fc2
commit a843e64fc2
parent 91d49cfe9f
37 changed files with 660 additions and 148 deletions
--- a/api/backend/embeddings.go
+++ b/api/backend/embeddings.go
@ -26,7 +26,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
 		model.WithLoadGRPCLLMModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithAssetDir(o.AssetsDestination),
-		model.WithModelFile(modelFile),
+		model.WithModel(modelFile),
 		model.WithContext(o.Context),
 	}

--- a/api/backend/image.go
+++ b/api/backend/image.go
@ -20,7 +20,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithContext(o.Context),
-		model.WithModelFile(c.ImageGenerationAssets),
+		model.WithModel(c.ImageGenerationAssets),
 	}

 	for k, v := range o.ExternalGRPCBackends {
--- a/api/backend/llm.go
+++ b/api/backend/llm.go
@ -27,7 +27,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
 		model.WithLoadGRPCLLMModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
 		model.WithAssetDir(o.AssetsDestination),
-		model.WithModelFile(modelFile),
+		model.WithModel(modelFile),
 		model.WithContext(o.Context),
 	}

--- a/api/backend/options.go
+++ b/api/backend/options.go
@ -19,6 +19,9 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 		Seed:          int32(c.Seed),
 		NBatch:        int32(b),
 		NGQA:          c.NGQA,
+		ModelBaseName: c.ModelBaseName,
+		Device:        c.Device,
+		UseTriton:     c.Triton,
 		RMSNormEps:    c.RMSNormEps,
 		F16Memory:     c.F16,
 		MLock:         c.MMlock,
--- a/api/backend/transcript.go
+++ b/api/backend/transcript.go
@ -15,7 +15,7 @@ import (
 func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
 	opts := []model.Option{
 		model.WithBackendString(model.WhisperBackend),
-		model.WithModelFile(c.Model),
+		model.WithModel(c.Model),
 		model.WithContext(o.Context),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithAssetDir(o.AssetsDestination),