Add support for cerebras (#45)

Signed-off-by: mudler <mudler@c3os.io>
2025-05-22 19:44:59 +00:00 · 2023-04-20 19:33:36 +02:00 · 2023-04-20 19:33:36 +02:00 · 1c4fbaae20
commit 1c4fbaae20
parent d517a54e28
6 changed files with 124 additions and 20 deletions
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@ -12,20 +12,24 @@ import (

 	"github.com/rs/zerolog/log"

+	gpt2 "github.com/go-skynet/go-gpt2.cpp"
 	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 )

 type ModelLoader struct {
-	modelPath        string
-	mu               sync.Mutex
-	models           map[string]*llama.LLama
-	gptmodels        map[string]*gptj.GPTJ
+	modelPath string
+	mu        sync.Mutex
+
+	models     map[string]*llama.LLama
+	gptmodels  map[string]*gptj.GPTJ
+	gpt2models map[string]*gpt2.GPT2
+
 	promptsTemplates map[string]*template.Template
 }

 func NewModelLoader(modelPath string) *ModelLoader {
-	return &ModelLoader{modelPath: modelPath, gptmodels: make(map[string]*gptj.GPTJ), models: make(map[string]*llama.LLama), promptsTemplates: make(map[string]*template.Template)}
+	return &ModelLoader{modelPath: modelPath, gpt2models: make(map[string]*gpt2.GPT2), gptmodels: make(map[string]*gptj.GPTJ), models: make(map[string]*llama.LLama), promptsTemplates: make(map[string]*template.Template)}
 }

 func (ml *ModelLoader) ExistsInModelPath(s string) bool {
@ -98,6 +102,38 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
 	return nil
 }

+func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+
+	// Check if we already have a loaded model
+	if !ml.ExistsInModelPath(modelName) {
+		return nil, fmt.Errorf("model does not exist")
+	}
+
+	if m, ok := ml.gpt2models[modelName]; ok {
+		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
+		return m, nil
+	}
+
+	// Load the model and keep it in memory for later use
+	modelFile := filepath.Join(ml.modelPath, modelName)
+	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
+
+	model, err := gpt2.New(modelFile)
+	if err != nil {
+		return nil, err
+	}
+
+	// If there is a prompt template, load it
+	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
+		return nil, err
+	}
+
+	ml.gpt2models[modelName] = model
+	return model, err
+}
+
 func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
@ -112,6 +148,13 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
 		return m, nil
 	}

+	// TODO: This needs refactoring, it's really bad to have it in here
+	// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
+	if _, ok := ml.gpt2models[modelName]; ok {
+		log.Debug().Msgf("Model is GPT2: %s", modelName)
+		return nil, fmt.Errorf("this model is a GPT2 one")
+	}
+
 	// Load the model and keep it in memory for later use
 	modelFile := filepath.Join(ml.modelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
@ -152,6 +195,10 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
 		log.Debug().Msgf("Model is GPTJ: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPTJ one")
 	}
+	if _, ok := ml.gpt2models[modelName]; ok {
+		log.Debug().Msgf("Model is GPT2: %s", modelName)
+		return nil, fmt.Errorf("this model is a GPT2 one")
+	}

 	// Load the model and keep it in memory for later use
 	modelFile := filepath.Join(ml.modelPath, modelName)