mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 06:25:00 +00:00
feat(speculative-sampling): allow to specify a draft model in the model config (#1052)
**Description** This PR fixes #1013. It adds `draft_model` and `n_draft` to the model YAML config in order to load models with speculative sampling. This should be compatible as well with grammars. example: ```yaml backend: llama context_size: 1024 name: my-model-name parameters: model: foo-bar n_draft: 16 draft_model: model-name ``` --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
247d85b523
commit
8ccf5b2044
12 changed files with 485 additions and 427 deletions
|
@ -63,6 +63,7 @@ message PredictOptions {
|
|||
float RopeFreqScale = 38;
|
||||
float NegativePromptScale = 39;
|
||||
string NegativePrompt = 40;
|
||||
int32 NDraft = 41;
|
||||
}
|
||||
|
||||
// The response message containing the result
|
||||
|
@ -116,7 +117,8 @@ message ModelOptions {
|
|||
string LoraBase = 35;
|
||||
string LoraAdapter = 36;
|
||||
bool NoMulMatQ = 37;
|
||||
|
||||
string DraftModel = 39;
|
||||
|
||||
string AudioPath = 38;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue