chore(autogptq): drop archived backend (#5214)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 02:24:59 +00:00 · 2025-04-19 15:52:29 +02:00 · 2025-04-19 15:52:29 +02:00 · 61cc76c455
commit 61cc76c455
parent 8abecb4a18
23 changed files with 5 additions and 322 deletions
--- a/core/backend/options.go
+++ b/core/backend/options.go
@ -184,11 +184,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		MainGPU:             c.MainGPU,
 		Threads:             int32(*c.Threads),
 		TensorSplit:         c.TensorSplit,
-		// AutoGPTQ
-		ModelBaseName:    c.AutoGPTQ.ModelBaseName,
-		Device:           c.AutoGPTQ.Device,
-		UseTriton:        c.AutoGPTQ.Triton,
-		UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
 		// RWKV
 		Tokenizer: c.Tokenizer,
 	}
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@ -50,9 +50,6 @@ type BackendConfig struct {
 	// LLM configs (GPT4ALL, Llama.cpp, ...)
 	LLMConfig `yaml:",inline"`

-	// AutoGPTQ specifics
-	AutoGPTQ AutoGPTQ `yaml:"autogptq"`
-
 	// Diffusers
 	Diffusers Diffusers `yaml:"diffusers"`
 	Step      int       `yaml:"step"`
@ -176,14 +173,6 @@ type LimitMMPerPrompt struct {
 	LimitAudioPerPrompt int `yaml:"audio"`
 }

-// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
-type AutoGPTQ struct {
-	ModelBaseName    string `yaml:"model_base_name"`
-	Device           string `yaml:"device"`
-	Triton           bool   `yaml:"triton"`
-	UseFastTokenizer bool   `yaml:"use_fast_tokenizer"`
-}
-
 // TemplateConfig is a struct that holds the configuration of the templating system
 type TemplateConfig struct {
 	// Chat is the template used in the chat completion endpoint
--- a/core/http/middleware/request.go
+++ b/core/http/middleware/request.go
@ -203,18 +203,10 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
 		config.Diffusers.ClipSkip = input.ClipSkip
 	}

-	if input.ModelBaseName != "" {
-		config.AutoGPTQ.ModelBaseName = input.ModelBaseName
-	}
-
 	if input.NegativePromptScale != 0 {
 		config.NegativePromptScale = input.NegativePromptScale
 	}

-	if input.UseFastTokenizer {
-		config.UseFastTokenizer = input.UseFastTokenizer
-	}
-
 	if input.NegativePrompt != "" {
 		config.NegativePrompt = input.NegativePrompt
 	}
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@ -202,7 +202,6 @@ type OpenAIRequest struct {

 	Backend string `json:"backend" yaml:"backend"`

-	// AutoGPTQ
 	ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
 }

--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@ -41,8 +41,6 @@ type PredictionOptions struct {
 	RopeFreqBase        float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
 	RopeFreqScale       float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
 	NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
-	// AutoGPTQ
-	UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`

 	// Diffusers
 	ClipSkip int `json:"clip_skip" yaml:"clip_skip"`