fix(config): set better defaults for inferencing (#1822)

* fix(defaults): set better defaults for inferencing This changeset aim to have better defaults and to properly detect when no inference settings are provided with the model. If not specified, we defaults to mirostat sampling, and offload all the GPU layers (if a GPU is detected). Related to https://github.com/mudler/LocalAI/issues/1373 and https://github.com/mudler/LocalAI/issues/1723 * Adapt tests * Also pre-initialize default seed
2025-05-20 02:24:59 +00:00 · 2024-03-13 10:05:30 +01:00 · 2024-03-13 10:05:30 +01:00 · f895d06605
commit f895d06605
parent bc8f648a91
12 changed files with 235 additions and 133 deletions
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@ -74,10 +74,10 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 	if input.Echo {
 		config.Echo = input.Echo
 	}
-	if input.TopK != 0 {
+	if input.TopK != nil {
 		config.TopK = input.TopK
 	}
-	if input.TopP != 0 {
+	if input.TopP != nil {
 		config.TopP = input.TopP
 	}

@ -117,11 +117,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 		config.Grammar = input.Grammar
 	}

-	if input.Temperature != 0 {
+	if input.Temperature != nil {
 		config.Temperature = input.Temperature
 	}

-	if input.Maxtokens != 0 {
+	if input.Maxtokens != nil {
 		config.Maxtokens = input.Maxtokens
 	}

@ -193,30 +193,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 		config.Batch = input.Batch
 	}

-	if input.F16 {
-		config.F16 = input.F16
-	}
-
 	if input.IgnoreEOS {
 		config.IgnoreEOS = input.IgnoreEOS
 	}

-	if input.Seed != 0 {
+	if input.Seed != nil {
 		config.Seed = input.Seed
 	}

-	if input.Mirostat != 0 {
-		config.LLMConfig.Mirostat = input.Mirostat
-	}
-
-	if input.MirostatETA != 0 {
-		config.LLMConfig.MirostatETA = input.MirostatETA
-	}
-
-	if input.MirostatTAU != 0 {
-		config.LLMConfig.MirostatTAU = input.MirostatTAU
-	}
-
 	if input.TypicalP != 0 {
 		config.TypicalP = input.TypicalP
 	}
@ -272,7 +256,12 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 }

 func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
-	cfg, err := config.LoadBackendConfigFileByName(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16)
+	cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath,
+		config.LoadOptionDebug(debug),
+		config.LoadOptionThreads(threads),
+		config.LoadOptionContextSize(ctx),
+		config.LoadOptionF16(f16),
+	)

 	// Set the parameters for the language model prediction
 	updateRequestConfig(cfg, input)