transformers: correctly load automodels (#1643)

* backends(transformers): use AutoModel with LLM types * examples: animagine-xl * Add codellama examples
2025-05-20 10:35:01 +00:00 · 2024-01-26 00:13:21 +01:00 · 2024-01-26 00:13:21 +01:00 · cb7512734d
commit cb7512734d
parent 3733250b3c
27 changed files with 1144 additions and 569 deletions
--- a/api/backend/options.go
+++ b/api/backend/options.go
@ -63,6 +63,8 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 		F16Memory:      c.F16,
 		MLock:          c.MMlock,
 		RopeFreqBase:   c.RopeFreqBase,
+		RopeScaling:    c.RopeScaling,
+		Type:           c.ModelType,
 		RopeFreqScale:  c.RopeFreqScale,
 		NUMA:           c.NUMA,
 		Embeddings:     c.Embeddings,
--- a/api/config/config.go
+++ b/api/config/config.go
@ -128,7 +128,9 @@ type LLMConfig struct {
 	Quantization string  `yaml:"quantization"`
 	MMProj       string  `yaml:"mmproj"`

-	RopeScaling    string  `yaml:"rope_scaling"`
+	RopeScaling string `yaml:"rope_scaling"`
+	ModelType   string `yaml:"type"`
+
 	YarnExtFactor  float32 `yaml:"yarn_ext_factor"`
 	YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
 	YarnBetaFast   float32 `yaml:"yarn_beta_fast"`