chore(autogptq): drop archived backend (#5214)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 18:45:00 +00:00 · 2025-04-19 15:52:29 +02:00 · 2025-04-19 15:52:29 +02:00 · 61cc76c455
commit 61cc76c455
parent 8abecb4a18
23 changed files with 5 additions and 322 deletions
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@ -268,14 +268,6 @@ yarn_ext_factor: 0
 yarn_attn_factor: 0
 yarn_beta_fast: 0
 yarn_beta_slow: 0
-
-# AutoGPT-Q settings, for configurations specific to GPT models.
-autogptq:
-    model_base_name: "" # Base name of the model.
-    device: "" # Device to run the model on.
-    triton: false # Whether to use Triton Inference Server.
-    use_fast_tokenizer: false # Whether to use a fast tokenizer for quicker processing.
-
 # configuration for diffusers model
 diffusers:
    cuda: false # Whether to use CUDA