mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 06:25:00 +00:00
feat: add initial AutoGPTQ backend implementation
This commit is contained in:
parent
91d49cfe9f
commit
a843e64fc2
37 changed files with 660 additions and 148 deletions
|
@ -26,7 +26,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||
model.WithThreads(uint32(c.Threads)),
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithModelFile(modelFile),
|
||||
model.WithModel(modelFile),
|
||||
model.WithContext(o.Context),
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithThreads(uint32(c.Threads)),
|
||||
model.WithContext(o.Context),
|
||||
model.WithModelFile(c.ImageGenerationAssets),
|
||||
model.WithModel(c.ImageGenerationAssets),
|
||||
}
|
||||
|
||||
for k, v := range o.ExternalGRPCBackends {
|
||||
|
|
|
@ -27,7 +27,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
|||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithModelFile(modelFile),
|
||||
model.WithModel(modelFile),
|
||||
model.WithContext(o.Context),
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,9 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
|||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NGQA: c.NGQA,
|
||||
ModelBaseName: c.ModelBaseName,
|
||||
Device: c.Device,
|
||||
UseTriton: c.Triton,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
|
|
|
@ -15,7 +15,7 @@ import (
|
|||
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
|
||||
opts := []model.Option{
|
||||
model.WithBackendString(model.WhisperBackend),
|
||||
model.WithModelFile(c.Model),
|
||||
model.WithModel(c.Model),
|
||||
model.WithContext(o.Context),
|
||||
model.WithThreads(uint32(c.Threads)),
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue