mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-25 13:04:59 +00:00
feat: Add UseFastTokenizer
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
39805b09e5
commit
3c8fc37c56
10 changed files with 198 additions and 169 deletions
|
@ -15,26 +15,27 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
|||
b = c.Batch
|
||||
}
|
||||
return &pb.ModelOptions{
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NGQA: c.NGQA,
|
||||
ModelBaseName: c.ModelBaseName,
|
||||
Device: c.Device,
|
||||
UseTriton: c.Triton,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NGQA: c.NGQA,
|
||||
ModelBaseName: c.ModelBaseName,
|
||||
UseFastTokenizer: c.UseFastTokenizer,
|
||||
Device: c.Device,
|
||||
UseTriton: c.Triton,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -56,9 +56,10 @@ type Config struct {
|
|||
NGQA int32 `yaml:"ngqa"`
|
||||
|
||||
// AutoGPTQ
|
||||
ModelBaseName string `yaml:"model_base_name"`
|
||||
Device string `yaml:"device"`
|
||||
Triton bool `yaml:"triton"`
|
||||
ModelBaseName string `yaml:"model_base_name"`
|
||||
Device string `yaml:"device"`
|
||||
Triton bool `yaml:"triton"`
|
||||
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
|
||||
}
|
||||
|
||||
type Functions struct {
|
||||
|
|
|
@ -39,4 +39,6 @@ type PredictionOptions struct {
|
|||
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
|
||||
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
|
||||
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
|
||||
// AutoGPTQ
|
||||
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
|
||||
}
|
||||
|
|
|
@ -83,6 +83,10 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
|||
config.NegativePromptScale = input.NegativePromptScale
|
||||
}
|
||||
|
||||
if input.UseFastTokenizer {
|
||||
config.UseFastTokenizer = input.UseFastTokenizer
|
||||
}
|
||||
|
||||
if input.NegativePrompt != "" {
|
||||
config.NegativePrompt = input.NegativePrompt
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue