LocalAI/apiv2/config.go

package apiv2

import (
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"sync"

	llama "github.com/go-skynet/go-llama.cpp"
	"github.com/mitchellh/mapstructure"
	"gopkg.in/yaml.v2"
)

type ConfigRegistration struct {
	Endpoint string `yaml:"endpoint" json:"endpoint" mapstructure:"endpoint"`
	Model    string `yaml:"model" json:"model" mapstructure:"model"`
}

type ConfigLocalSettings struct {
	ModelPath    string `yaml:"model" mapstructure:"model"`
	TemplatePath string `yaml:"template" mapstructure:"template"`
	Backend      string `yaml:"backend" mapstructure:"backend"`
	Threads      int    `yaml:"threads" mapstructure:"threads"`
}

type ConfigStub struct {
	Registration  ConfigRegistration  `yaml:"registration" mapstructure:"registration"`
	LocalSettings ConfigLocalSettings `yaml:"local_paths" mapstructure:"local_paths"`
}

type SpecificConfig[RequestModel any] struct {
	ConfigStub      `mapstructure:",squash"`
	RequestDefaults RequestModel `yaml:"request_defaults" mapstructure:"request_defaults"`
}

type Config interface {
	GetRequestDefaults() interface{}
	GetLocalSettings() ConfigLocalSettings
	GetRegistration() ConfigRegistration
}

func (cs ConfigStub) GetRequestDefaults() interface{} {
	return nil
}

func (cs ConfigStub) GetLocalSettings() ConfigLocalSettings {
	return cs.LocalSettings
}

func (cs ConfigStub) GetRegistration() ConfigRegistration {
	return cs.Registration
}

func (sc SpecificConfig[RequestModel]) GetRequestDefaults() interface{} {
	return sc.RequestDefaults
}

func (sc SpecificConfig[RequestModel]) GetRequest() RequestModel {
	return sc.RequestDefaults
}

func (sc SpecificConfig[RequestModel]) GetLocalSettings() ConfigLocalSettings {
	return sc.LocalSettings
}

func (sc SpecificConfig[RequestModel]) GetRegistration() ConfigRegistration {
	return sc.Registration
}

type ConfigManager struct {
	configs map[ConfigRegistration]Config
	sync.Mutex
}

func NewConfigManager() *ConfigManager {
	return &ConfigManager{
		configs: make(map[ConfigRegistration]Config),
	}
}

// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that.
func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
	fmt.Printf("INTERNAL loadConfigFile for %s\n", path)
	stub := ConfigStub{}
	f, err := os.ReadFile(path)
	if err != nil {
		return nil, fmt.Errorf("cannot read config file: %w", err)
	}
	if err := yaml.Unmarshal(f, &stub); err != nil {
		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
	}
	fmt.Printf("RAW STUB: %+v\n", stub)

	endpoint := stub.Registration.Endpoint

	// EndpointConfigMap is generated over in localai.gen.go
	// It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request.
	if structType, ok := EndpointConfigMap[endpoint]; ok {
		fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType)
		tmpUnmarshal := map[string]interface{}{}
		if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil {
			if e, ok := err.(*yaml.TypeError); ok {
				fmt.Println("\n!!!!!Type error:", e)
			}
			return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err)
		}
		fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal)
		mapstructure.Decode(tmpUnmarshal, &structType)

		fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType)

		// rawConfig.RequestDefaults = structType.GetRequestDefaults()

		cm.configs[structType.GetRegistration()] = structType
		// fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!")
		return &structType, nil
		// fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType)
	}

	// for i, ts := range EndpointToRequestBodyMap {
	// 	fmt.Printf("%s: %+v\n", i, ts)
	// }

	return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint)
}

func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) {
	fmt.Printf("LoadConfigFile TOP for %s", path)

	cm.Lock()
	fmt.Println("cm.Lock done")

	defer cm.Unlock()
	fmt.Println("cm.Unlock done")

	return cm.loadConfigFile(path)
}

func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) {
	fmt.Printf("LoadConfigDirectory TOP for %s\n", path)
	cm.Lock()
	defer cm.Unlock()
	files, err := os.ReadDir(path)
	if err != nil {
		return []ConfigRegistration{}, err
	}
	fmt.Printf("os.ReadDir done, found %d files\n", len(files))

	for _, file := range files {
		// Skip anything that isn't yaml
		if !strings.Contains(file.Name(), ".yaml") {
			continue
		}
		_, err := cm.loadConfigFile(filepath.Join(path, file.Name()))
		if err != nil {
			return []ConfigRegistration{}, err
		}
	}

	fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs))

	return cm.listConfigs(), nil
}

func (cm *ConfigManager) GetConfig(r ConfigRegistration) (Config, bool) {
	cm.Lock()
	defer cm.Unlock()
	v, exists := cm.configs[r]
	return v, exists
}

// This is a convience function for endpoint functions to use.
// The advantage is it avoids errors in the endpoint string
// Not a clue what the performance cost of this is.
func (cm *ConfigManager) GetConfigForThisEndpoint(m string) (Config, bool) {
	endpoint := printCurrentFunctionName(2)
	return cm.GetConfig(ConfigRegistration{
		Model:    m,
		Endpoint: endpoint,
	})
}

func (cm *ConfigManager) listConfigs() []ConfigRegistration {
	var res []ConfigRegistration
	for k := range cm.configs {
		res = append(res, k)
	}
	return res
}

func (cm *ConfigManager) ListConfigs() []ConfigRegistration {
	cm.Lock()
	defer cm.Unlock()
	return cm.listConfigs()
}

// These functions I'm a bit dubious about. I think there's a better refactoring down in pkg/model
// But to get a minimal test up and running, here we go!
// TODO: non text completion
func (sc SpecificConfig[RequestModel]) ToModelOptions() []llama.ModelOption {

	llamaOpts := []llama.ModelOption{}

	switch req := sc.GetRequestDefaults().(type) {
	case CreateCompletionRequest:
	case CreateChatCompletionRequest:
		if req.XLocalaiExtensions.F16 != nil && *(req.XLocalaiExtensions.F16) {
			llamaOpts = append(llamaOpts, llama.EnableF16Memory)
		}

		if req.MaxTokens != nil && *req.MaxTokens > 0 {
			llamaOpts = append(llamaOpts, llama.SetContext(*req.MaxTokens)) // todo is this right?
		}

		// TODO DO MORE!

	}
	// Code to Port:

	// if c.Embeddings {
	// 	llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
	// }

	// if c.NGPULayers != 0 {
	// 	llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
	// }

	return llamaOpts
}

func (sc SpecificConfig[RequestModel]) ToPredictOptions() []llama.PredictOption {
	llamaOpts := []llama.PredictOption{
		llama.SetThreads(sc.GetLocalSettings().Threads),
	}

	switch req := sc.GetRequestDefaults().(type) {
	// TODO Refactor this when we get to p2 and add image / audio
	// I expect that it'll be worth pulling out the base case first, and doing fancy fallthrough things.
	// Text Requests:
	case CreateCompletionRequest:
	case CreateChatCompletionRequest:

		if req.Temperature != nil {
			llamaOpts = append(llamaOpts, llama.SetTemperature(float64(*req.Temperature))) // Oh boy. TODO Investigate. This is why I'm doing this.
		}

		if req.TopP != nil {
			llamaOpts = append(llamaOpts, llama.SetTopP(float64(*req.TopP))) // CAST
		}

		if req.MaxTokens != nil {
			llamaOpts = append(llamaOpts, llama.SetTokens(*req.MaxTokens))
		}

		if req.FrequencyPenalty != nil {
			llamaOpts = append(llamaOpts, llama.SetPenalty(float64(*req.FrequencyPenalty))) // CAST
		}

		if stop0, err := req.Stop.AsCreateChatCompletionRequestStop0(); err == nil {
			llamaOpts = append(llamaOpts, llama.SetStopWords(stop0))
		}

		if stop1, err := req.Stop.AsCreateChatCompletionRequestStop1(); err == nil && len(stop1) > 0 {
			llamaOpts = append(llamaOpts, llama.SetStopWords(stop1...))
		}

		if req.XLocalaiExtensions != nil {

			if req.XLocalaiExtensions.TopK != nil {
				llamaOpts = append(llamaOpts, llama.SetTopK(*req.XLocalaiExtensions.TopK))
			}

			if req.XLocalaiExtensions.F16 != nil && *(req.XLocalaiExtensions.F16) {
				llamaOpts = append(llamaOpts, llama.EnableF16KV)
			}

			if req.XLocalaiExtensions.Seed != nil {
				llamaOpts = append(llamaOpts, llama.SetSeed(*req.XLocalaiExtensions.Seed))
			}

			if req.XLocalaiExtensions.IgnoreEos != nil && *(req.XLocalaiExtensions.IgnoreEos) {
				llamaOpts = append(llamaOpts, llama.IgnoreEOS)
			}

			if req.XLocalaiExtensions.Debug != nil && *(req.XLocalaiExtensions.Debug) {
				llamaOpts = append(llamaOpts, llama.Debug)
			}

			if req.XLocalaiExtensions.Mirostat != nil {
				llamaOpts = append(llamaOpts, llama.SetMirostat(*req.XLocalaiExtensions.Mirostat))
			}

			if req.XLocalaiExtensions.MirostatEta != nil {
				llamaOpts = append(llamaOpts, llama.SetMirostatETA(*req.XLocalaiExtensions.MirostatEta))
			}

			if req.XLocalaiExtensions.MirostatTau != nil {
				llamaOpts = append(llamaOpts, llama.SetMirostatTAU(*req.XLocalaiExtensions.MirostatTau))
			}

			if req.XLocalaiExtensions.Keep != nil {
				llamaOpts = append(llamaOpts, llama.SetNKeep(*req.XLocalaiExtensions.Keep))
			}

			if req.XLocalaiExtensions.Batch != nil && *(req.XLocalaiExtensions.Batch) != 0 {
				llamaOpts = append(llamaOpts, llama.SetBatch(*req.XLocalaiExtensions.Batch))
			}

		}

	}

	// CODE TO PORT

	// SKIPPING PROMPT CACHE FOR PASS ONE, TODO READ ABOUT IT

	// if c.PromptCacheAll {
	// 	predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
	// }

	// if c.PromptCachePath != "" {
	// 	// Create parent directory
	// 	p := filepath.Join(modelPath, c.PromptCachePath)
	// 	os.MkdirAll(filepath.Dir(p), 0755)
	// 	predictOptions = append(predictOptions, llama.SetPathPromptCache(p))
	// }

	return llamaOpts
}