mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-19 18:15:00 +00:00
chore: drop embedded models (#4715)
Since the remote gallery was introduced this is now completely superseded by it. In order to keep the code clean and remove redudant parts let's simplify the usage. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
1656e1a88e
commit
72e52c4f6a
39 changed files with 8 additions and 986 deletions
2
Makefile
2
Makefile
|
@ -861,7 +861,7 @@ swagger:
|
||||||
|
|
||||||
.PHONY: gen-assets
|
.PHONY: gen-assets
|
||||||
gen-assets:
|
gen-assets:
|
||||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
docs/layouts/_default:
|
docs/layouts/_default:
|
||||||
|
|
|
@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||||
log.Error().Err(err).Msg("error installing models")
|
log.Error().Err(err).Msg("error installing models")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,6 @@ type RunCMD struct {
|
||||||
|
|
||||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
|
||||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||||
|
@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||||
config.WithF16(r.F16),
|
config.WithF16(r.F16),
|
||||||
config.WithStringGalleries(r.Galleries),
|
config.WithStringGalleries(r.Galleries),
|
||||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
|
||||||
config.WithCors(r.CORS),
|
config.WithCors(r.CORS),
|
||||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||||
config.WithCsrf(r.CSRF),
|
config.WithCsrf(r.CSRF),
|
||||||
|
|
|
@ -44,8 +44,6 @@ type ApplicationConfig struct {
|
||||||
DisableGalleryEndpoint bool
|
DisableGalleryEndpoint bool
|
||||||
LoadToMemory []string
|
LoadToMemory []string
|
||||||
|
|
||||||
ModelLibraryURL string
|
|
||||||
|
|
||||||
Galleries []Gallery
|
Galleries []Gallery
|
||||||
|
|
||||||
BackendAssets embed.FS
|
BackendAssets embed.FS
|
||||||
|
@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithModelLibraryURL(url string) AppOption {
|
|
||||||
return func(o *ApplicationConfig) {
|
|
||||||
o.ModelLibraryURL = url
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithLibPath(path string) AppOption {
|
func WithLibPath(path string) AppOption {
|
||||||
return func(o *ApplicationConfig) {
|
return func(o *ApplicationConfig) {
|
||||||
o.LibPath = path
|
o.LibPath = path
|
||||||
|
|
|
@ -129,7 +129,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||||
if op.GalleryModelName != "" {
|
if op.GalleryModelName != "" {
|
||||||
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans)
|
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans)
|
||||||
} else if op.ConfigURL != "" {
|
} else if op.ConfigURL != "" {
|
||||||
err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
|
err = startup.InstallModels(op.Galleries, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
updateError(err)
|
updateError(err)
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -1,126 +0,0 @@
|
||||||
+++
|
|
||||||
disableToc = false
|
|
||||||
title = "Run other Models"
|
|
||||||
weight = 23
|
|
||||||
icon = "rocket_launch"
|
|
||||||
|
|
||||||
+++
|
|
||||||
|
|
||||||
## Running other models
|
|
||||||
|
|
||||||
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_.
|
|
||||||
|
|
||||||
To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
|
|
||||||
|
|
||||||
To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
|
|
||||||
|
|
||||||
There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
|
|
||||||
|
|
||||||
{{% alert icon="💡" %}}
|
|
||||||
|
|
||||||
To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
|
|
||||||
{{% /alert %}}
|
|
||||||
|
|
||||||
{{< tabs tabTotal="3" >}}
|
|
||||||
{{% tab tabName="CPU-only" %}}
|
|
||||||
|
|
||||||
> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
|
|
||||||
|
|
||||||
| Model | Category | Docker command |
|
|
||||||
| --- | --- | --- |
|
|
||||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
|
|
||||||
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
|
|
||||||
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
|
|
||||||
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
|
|
||||||
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
|
|
||||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
|
|
||||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
|
|
||||||
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
|
|
||||||
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
|
|
||||||
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
|
|
||||||
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
|
|
||||||
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
|
|
||||||
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
|
|
||||||
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
|
|
||||||
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
|
|
||||||
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
|
|
||||||
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
|
|
||||||
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
|
|
||||||
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
|
|
||||||
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
|
|
||||||
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
|
|
||||||
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
|
|
||||||
{{% /tab %}}
|
|
||||||
|
|
||||||
{{% tab tabName="GPU (CUDA 11)" %}}
|
|
||||||
|
|
||||||
|
|
||||||
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
|
|
||||||
|
|
||||||
| Model | Category | Docker command |
|
|
||||||
| --- | --- | --- |
|
|
||||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
|
|
||||||
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
|
|
||||||
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
|
|
||||||
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
|
|
||||||
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
|
|
||||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
|
|
||||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
|
|
||||||
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
|
|
||||||
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
|
|
||||||
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
|
|
||||||
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
|
|
||||||
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
|
|
||||||
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
|
|
||||||
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
|
|
||||||
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
|
|
||||||
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
|
|
||||||
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` |
|
|
||||||
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
|
|
||||||
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
|
|
||||||
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
|
|
||||||
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
|
|
||||||
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
|
|
||||||
{{% /tab %}}
|
|
||||||
|
|
||||||
|
|
||||||
{{% tab tabName="GPU (CUDA 12)" %}}
|
|
||||||
|
|
||||||
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
|
|
||||||
|
|
||||||
| Model | Category | Docker command |
|
|
||||||
| --- | --- | --- |
|
|
||||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
|
|
||||||
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
|
|
||||||
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
|
|
||||||
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
|
|
||||||
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
|
|
||||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
|
|
||||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
|
|
||||||
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
|
|
||||||
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
|
|
||||||
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
|
|
||||||
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
|
|
||||||
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
|
|
||||||
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
|
|
||||||
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
|
|
||||||
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
|
|
||||||
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
|
|
||||||
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` |
|
|
||||||
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
|
|
||||||
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
|
|
||||||
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
|
|
||||||
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
|
|
||||||
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
|
|
||||||
{{% /tab %}}
|
|
||||||
|
|
||||||
{{< /tabs >}}
|
|
||||||
|
|
||||||
{{% alert icon="💡" %}}
|
|
||||||
**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
|
|
||||||
```
|
|
||||||
|
|
||||||
{{% /alert %}}
|
|
|
@ -143,7 +143,7 @@ The AIO Images are inheriting the same environment variables as the base images
|
||||||
| Variable | Default | Description |
|
| Variable | Default | Description |
|
||||||
| ---------------------| ------- | ----------- |
|
| ---------------------| ------- | ----------- |
|
||||||
| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
|
| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
|
||||||
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/advanced/run-other-models" %}})) |
|
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/models" %}})) |
|
||||||
|
|
||||||
|
|
||||||
## Standard container images
|
## Standard container images
|
||||||
|
|
|
@ -1,72 +0,0 @@
|
||||||
package embedded
|
|
||||||
|
|
||||||
import (
|
|
||||||
"embed"
|
|
||||||
"fmt"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/assets"
|
|
||||||
"gopkg.in/yaml.v3"
|
|
||||||
)
|
|
||||||
|
|
||||||
var modelShorteners map[string]string
|
|
||||||
|
|
||||||
//go:embed model_library.yaml
|
|
||||||
var modelLibrary []byte
|
|
||||||
|
|
||||||
//go:embed models/*
|
|
||||||
var embeddedModels embed.FS
|
|
||||||
|
|
||||||
func ModelShortURL(s string) string {
|
|
||||||
if _, ok := modelShorteners[s]; ok {
|
|
||||||
s = modelShorteners[s]
|
|
||||||
}
|
|
||||||
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
err := yaml.Unmarshal(modelLibrary, &modelShorteners)
|
|
||||||
if err != nil {
|
|
||||||
log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
|
|
||||||
remoteLibrary := map[string]string{}
|
|
||||||
uri := downloader.URI(url)
|
|
||||||
err := uri.DownloadWithCallback(basePath, func(_ string, i []byte) error {
|
|
||||||
return yaml.Unmarshal(i, &remoteLibrary)
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error downloading remote library: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
return remoteLibrary, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExistsInModelsLibrary checks if a model exists in the embedded models library
|
|
||||||
func ExistsInModelsLibrary(s string) bool {
|
|
||||||
f := fmt.Sprintf("%s.yaml", s)
|
|
||||||
|
|
||||||
a := []string{}
|
|
||||||
|
|
||||||
for _, j := range assets.ListFiles(embeddedModels) {
|
|
||||||
a = append(a, strings.TrimPrefix(j, "models/"))
|
|
||||||
}
|
|
||||||
|
|
||||||
return slices.Contains(a, f)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResolveContent returns the content in the embedded model library
|
|
||||||
func ResolveContent(s string) ([]byte, error) {
|
|
||||||
if ExistsInModelsLibrary(s) {
|
|
||||||
return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, fmt.Errorf("cannot find model %s", s)
|
|
||||||
}
|
|
|
@ -1,9 +0,0 @@
|
||||||
###
|
|
||||||
###
|
|
||||||
### This file contains the list of models that are available in the library
|
|
||||||
### The URLs are automatically expanded when local-ai is being called with the key as argument
|
|
||||||
###
|
|
||||||
### For models with an entire YAML file to be embededd, put the file inside the `models`
|
|
||||||
### directory, it will be automatically available with the file name as key (without the .yaml extension)
|
|
||||||
|
|
||||||
phi-2: "github://mudler/LocalAI-examples/configurations/phi-2.yaml@main"
|
|
|
@ -1,13 +0,0 @@
|
||||||
name: all-minilm-l6-v2
|
|
||||||
backend: sentencetransformers
|
|
||||||
embeddings: true
|
|
||||||
parameters:
|
|
||||||
model: all-MiniLM-L6-v2
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"input": "Your text string goes here",
|
|
||||||
"model": "all-minilm-l6-v2"
|
|
||||||
}'
|
|
|
@ -1,17 +0,0 @@
|
||||||
name: animagine-xl
|
|
||||||
parameters:
|
|
||||||
model: Linaqruf/animagine-xl
|
|
||||||
backend: diffusers
|
|
||||||
f16: true
|
|
||||||
diffusers:
|
|
||||||
scheduler_type: euler_a
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
|
||||||
"model": "animagine-xl",
|
|
||||||
"step": 51,
|
|
||||||
"size": "1024x1024"
|
|
||||||
}'
|
|
|
@ -1,40 +0,0 @@
|
||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
|
|
||||||
gpu_layers: 90
|
|
||||||
mmap: true
|
|
||||||
name: bakllava
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: bakllava-mmproj.gguf
|
|
||||||
parameters:
|
|
||||||
model: bakllava.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
mirostat: 2
|
|
||||||
mirostat_eta: 1.0
|
|
||||||
mirostat_tau: 1.0
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: bakllava.gguf
|
|
||||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
|
||||||
- filename: bakllava-mmproj.gguf
|
|
||||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "bakllava",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
|
@ -1,8 +0,0 @@
|
||||||
usage: |
|
|
||||||
bark works without any configuration, to test it, you can run the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"backend": "bark",
|
|
||||||
"input":"Hello, this is a test!"
|
|
||||||
}' | aplay
|
|
||||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
|
|
@ -1,24 +0,0 @@
|
||||||
backend: llama
|
|
||||||
context_size: 8192
|
|
||||||
f16: false
|
|
||||||
gpu_layers: 90
|
|
||||||
name: cerbero
|
|
||||||
mmap: false
|
|
||||||
parameters:
|
|
||||||
model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf
|
|
||||||
top_k: 80
|
|
||||||
temperature: 0.2
|
|
||||||
top_p: 0.7
|
|
||||||
template:
|
|
||||||
completion: "{{.Input}}"
|
|
||||||
chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] "
|
|
||||||
roles:
|
|
||||||
user: "[|Umano|] "
|
|
||||||
system: "[|Umano|] "
|
|
||||||
assistant: "[|Assistente|] "
|
|
||||||
|
|
||||||
stopwords:
|
|
||||||
- "[|Umano|]"
|
|
||||||
|
|
||||||
trimsuffix:
|
|
||||||
- "\n"
|
|
|
@ -1,20 +0,0 @@
|
||||||
name: codellama-7b-gguf
|
|
||||||
backend: transformers
|
|
||||||
parameters:
|
|
||||||
model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
|
|
||||||
temperature: 0.5
|
|
||||||
top_k: 40
|
|
||||||
seed: -1
|
|
||||||
top_p: 0.95
|
|
||||||
mirostat: 2
|
|
||||||
mirostat_eta: 1.0
|
|
||||||
mirostat_tau: 1.0
|
|
||||||
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
gpu_layers: 90
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "codellama-7b-gguf",
|
|
||||||
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
|
|
||||||
}'
|
|
|
@ -1,14 +0,0 @@
|
||||||
name: codellama-7b
|
|
||||||
backend: transformers
|
|
||||||
type: AutoModelForCausalLM
|
|
||||||
parameters:
|
|
||||||
model: codellama/CodeLlama-7b-hf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "codellama-7b",
|
|
||||||
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
|
|
||||||
}'
|
|
|
@ -1,9 +0,0 @@
|
||||||
usage: |
|
|
||||||
coqui works without any configuration, to test it, you can run the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"backend": "coqui",
|
|
||||||
"model": "tts_models/en/ljspeech/glow-tts",
|
|
||||||
"input":"Hello, this is a test!"
|
|
||||||
}'
|
|
||||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
|
|
@ -1,31 +0,0 @@
|
||||||
name: dolphin-mixtral-8x7b
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
|
|
||||||
temperature: 0.5
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
mirostat: 2
|
|
||||||
mirostat_eta: 1.0
|
|
||||||
mirostat_tau: 1.0
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
|
||||||
chat: |
|
|
||||||
{{.Input}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
gpu_layers: 90
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "dolphin-mixtral-8x7b",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
|
@ -1,59 +0,0 @@
|
||||||
name: hermes-2-pro-mistral
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
<tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
<tool_response>
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Content}}
|
|
||||||
{{.Content }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
|
||||||
function: |
|
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
</tools>
|
|
||||||
Use the following pydantic model json schema for each tool call you will make:
|
|
||||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{'arguments': <args-dict>, 'name': <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
<tool_call>
|
|
||||||
chat: |
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
- "\n</tool_call>"
|
|
||||||
- "\n\n\n"
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "hermes-2-pro-mistral",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
|
@ -1,48 +0,0 @@
|
||||||
name: llama3-8b-instruct
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
|
||||||
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
Function call:
|
|
||||||
{{ else if eq .RoleName "tool" -}}
|
|
||||||
Function response:
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .Content -}}
|
|
||||||
{{.Content -}}
|
|
||||||
{{ else if .FunctionCall -}}
|
|
||||||
{{ toJson .FunctionCall -}}
|
|
||||||
{{ end -}}
|
|
||||||
<|eot_id|>
|
|
||||||
function: |
|
|
||||||
<|start_header_id|>system<|end_header_id|>
|
|
||||||
|
|
||||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
</tools>
|
|
||||||
Use the following pydantic model json schema for each tool call you will make:
|
|
||||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|
||||||
Function call:
|
|
||||||
chat: |
|
|
||||||
<|begin_of_text|>{{.Input }}
|
|
||||||
<|start_header_id|>assistant<|end_header_id|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 8192
|
|
||||||
f16: true
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
- "<|eot_id|>"
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "llama3-8b-instruct",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
|
@ -1,33 +0,0 @@
|
||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
|
|
||||||
gpu_layers: 90
|
|
||||||
mmap: true
|
|
||||||
name: llava-1.5
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
|
|
||||||
parameters:
|
|
||||||
model: llava-v1.5-7b-Q4_K.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: llava-v1.5-7b-Q4_K.gguf
|
|
||||||
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
|
|
||||||
- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
|
|
||||||
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "llava-1.5",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
|
@ -1,33 +0,0 @@
|
||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
|
|
||||||
gpu_layers: 90
|
|
||||||
mmap: true
|
|
||||||
name: llava-1.6-mistral
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
parameters:
|
|
||||||
model: llava-v1.6-mistral-7b.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: llava-v1.6-mistral-7b.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
|
|
||||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "llava-1.6-mistral",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
|
@ -1,37 +0,0 @@
|
||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
|
|
||||||
gpu_layers: 90
|
|
||||||
mmap: true
|
|
||||||
name: llava-1.6-vicuna
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: mmproj-vicuna7b-f16.gguf
|
|
||||||
parameters:
|
|
||||||
model: vicuna-7b-q5_k.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: vicuna-7b-q5_k.gguf
|
|
||||||
uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
|
|
||||||
- filename: mmproj-vicuna7b-f16.gguf
|
|
||||||
uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "llava-1.6-vicuna",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
|
@ -1,40 +0,0 @@
|
||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
|
|
||||||
gpu_layers: 90
|
|
||||||
mmap: true
|
|
||||||
name: llava
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: bakllava-mmproj.gguf
|
|
||||||
parameters:
|
|
||||||
model: bakllava.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
mirostat: 2
|
|
||||||
mirostat_eta: 1.0
|
|
||||||
mirostat_tau: 1.0
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: bakllava.gguf
|
|
||||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
|
||||||
- filename: bakllava-mmproj.gguf
|
|
||||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "llava",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
|
@ -1,21 +0,0 @@
|
||||||
name: bagel
|
|
||||||
backend: mamba
|
|
||||||
parameters:
|
|
||||||
model: "jondurbin/bagel-dpo-2.8b-v0.2"
|
|
||||||
systemPrompt: "You are a helpful, unbiased, uncensored assistant."
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
{{if eq .RoleName "assistant"}}{{.Content}}{{else}}
|
|
||||||
[INST]
|
|
||||||
{{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName "system"}}<<SYS>>{{.Content}}<</SYS>>
|
|
||||||
|
|
||||||
{{else if .Content}}{{.Content}}{{end}}
|
|
||||||
[/INST]
|
|
||||||
{{end}}
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "bagel",
|
|
||||||
"messages": [{"role": "user", "content": "how are you doing"}],
|
|
||||||
}'
|
|
|
@ -1,28 +0,0 @@
|
||||||
name: mamba-chat
|
|
||||||
backend: mamba
|
|
||||||
parameters:
|
|
||||||
model: "havenhq/mamba-chat"
|
|
||||||
|
|
||||||
trimsuffix:
|
|
||||||
- <|endoftext|>
|
|
||||||
|
|
||||||
# https://huggingface.co/HuggingFaceH4/zephyr-7b-beta/blob/main/tokenizer_config.json
|
|
||||||
# "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
{{if eq .RoleName "assistant"}}<|assistant|>{{else if eq .RoleName "system"}}<|system|>{{else if eq .RoleName "user"}}<|user|>{{end}}
|
|
||||||
{{if .Content}}{{.Content}}{{end}}
|
|
||||||
</s>
|
|
||||||
|
|
||||||
chat: |
|
|
||||||
{{.Input}}
|
|
||||||
<|assistant|>
|
|
||||||
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "mamba-chat",
|
|
||||||
"messages": [{"role": "user", "content": "how are you doing"}],
|
|
||||||
"temperature": 0.7
|
|
||||||
}'
|
|
|
@ -1,32 +0,0 @@
|
||||||
name: mistral-openorca
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
mirostat: 2
|
|
||||||
mirostat_eta: 1.0
|
|
||||||
mirostat_tau: 1.0
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{if .Content}}{{.Content}}{{end}}
|
|
||||||
<|im_end|>
|
|
||||||
chat: |
|
|
||||||
{{.Input}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "mistral-openorca",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
|
@ -1,25 +0,0 @@
|
||||||
name: mixtral-instruct
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
seed: -1
|
|
||||||
top_p: 0.95
|
|
||||||
mirostat: 2
|
|
||||||
mirostat_eta: 1.0
|
|
||||||
mirostat_tau: 1.0
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: &chat |
|
|
||||||
[INST] {{.Input}} [/INST]
|
|
||||||
completion: *chat
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
gpu_layers: 90
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "mixtral-instruct",
|
|
||||||
"prompt": "How are you doing?"
|
|
||||||
}'
|
|
|
@ -1,25 +0,0 @@
|
||||||
name: phi-2-chat
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{if .Content}}{{.Content}}{{end}}
|
|
||||||
<|im_end|>
|
|
||||||
chat: |
|
|
||||||
{{.Input}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "phi-2-chat",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
|
@ -1,30 +0,0 @@
|
||||||
name: phi-2-orange
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{if .Content}}{{.Content}}{{end}}
|
|
||||||
<|im_end|>
|
|
||||||
chat: |
|
|
||||||
{{.Input}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
|
|
||||||
description: |
|
|
||||||
This model is a chatbot that can be used for general conversation.
|
|
||||||
[Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "phi-2-orange",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
|
@ -1,13 +0,0 @@
|
||||||
name: voice-en-us-amy-low
|
|
||||||
download_files:
|
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
|
||||||
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
To test if this model works as expected, you can use the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"model":"en-us-amy-low.onnx",
|
|
||||||
"input": "Hi, this is a test."
|
|
||||||
}'
|
|
|
@ -1,29 +0,0 @@
|
||||||
name: tinyllama-chat
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
seed: -1
|
|
||||||
top_p: 0.95
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
|
||||||
chat: |
|
|
||||||
{{.Input}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
gpu_layers: 90
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "tinyllama-chat",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
|
@ -1,31 +0,0 @@
|
||||||
name: tinyllama-chat
|
|
||||||
backend: transformers
|
|
||||||
type: AutoModelForCausalLM
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
max_tokens: 4096
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
|
||||||
chat: |
|
|
||||||
{{.Input}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "tinyllama-chat",
|
|
||||||
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
|
||||||
"temperature": 0.7
|
|
||||||
}'
|
|
|
@ -1,8 +0,0 @@
|
||||||
usage: |
|
|
||||||
Vall-e-x works without any configuration, to test it, you can run the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"backend": "vall-e-x",
|
|
||||||
"input":"Hello, this is a test!"
|
|
||||||
}' | aplay
|
|
||||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
|
|
@ -1,18 +0,0 @@
|
||||||
name: whisper
|
|
||||||
backend: whisper
|
|
||||||
parameters:
|
|
||||||
model: ggml-whisper-base.bin
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
## example audio file
|
|
||||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
|
||||||
|
|
||||||
## Send the example audio file to the transcriptions endpoint
|
|
||||||
curl http://localhost:8080/v1/audio/transcriptions \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F file="@$PWD/gb1.ogg" -F model="whisper"
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: "ggml-whisper-base.bin"
|
|
||||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
|
||||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
|
|
@ -9,7 +9,6 @@ import (
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/gallery"
|
"github.com/mudler/LocalAI/core/gallery"
|
||||||
"github.com/mudler/LocalAI/embedded"
|
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
@ -18,42 +17,17 @@ import (
|
||||||
// InstallModels will preload models from the given list of URLs and galleries
|
// InstallModels will preload models from the given list of URLs and galleries
|
||||||
// It will download the model if it is not already present in the model path
|
// It will download the model if it is not already present in the model path
|
||||||
// It will also try to resolve if the model is an embedded model YAML configuration
|
// It will also try to resolve if the model is an embedded model YAML configuration
|
||||||
func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
|
func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
|
||||||
// create an error that groups all errors
|
// create an error that groups all errors
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
|
|
||||||
|
|
||||||
for _, url := range models {
|
for _, url := range models {
|
||||||
// As a best effort, try to resolve the model from the remote library
|
// As a best effort, try to resolve the model from the remote library
|
||||||
// if it's not resolved we try with the other method below
|
// if it's not resolved we try with the other method below
|
||||||
if modelLibraryURL != "" {
|
|
||||||
if lib[url] != "" {
|
|
||||||
log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
|
|
||||||
url = lib[url]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
url = embedded.ModelShortURL(url)
|
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case embedded.ExistsInModelsLibrary(url):
|
|
||||||
modelYAML, e := embedded.ResolveContent(url)
|
|
||||||
// If we resolve something, just save it to disk and continue
|
|
||||||
if e != nil {
|
|
||||||
log.Error().Err(e).Msg("error resolving model content")
|
|
||||||
err = errors.Join(err, e)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debug().Msgf("[startup] resolved embedded model: %s", url)
|
|
||||||
md5Name := utils.MD5(url)
|
|
||||||
modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
|
|
||||||
if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); err != nil {
|
|
||||||
log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
|
|
||||||
err = errors.Join(err, e)
|
|
||||||
}
|
|
||||||
case uri.LooksLikeOCI():
|
case uri.LooksLikeOCI():
|
||||||
log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
|
log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,6 @@ import (
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
. "github.com/mudler/LocalAI/pkg/startup"
|
. "github.com/mudler/LocalAI/pkg/startup"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
|
@ -16,29 +15,13 @@ import (
|
||||||
var _ = Describe("Preload test", func() {
|
var _ = Describe("Preload test", func() {
|
||||||
|
|
||||||
Context("Preloading from strings", func() {
|
Context("Preloading from strings", func() {
|
||||||
It("loads from remote url", func() {
|
|
||||||
tmpdir, err := os.MkdirTemp("", "")
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml"
|
|
||||||
fileName := fmt.Sprintf("%s.yaml", "phi-2")
|
|
||||||
|
|
||||||
InstallModels([]config.Gallery{}, libraryURL, tmpdir, true, nil, "phi-2")
|
|
||||||
|
|
||||||
resultFile := filepath.Join(tmpdir, fileName)
|
|
||||||
|
|
||||||
content, err := os.ReadFile(resultFile)
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
|
|
||||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("loads from embedded full-urls", func() {
|
It("loads from embedded full-urls", func() {
|
||||||
tmpdir, err := os.MkdirTemp("", "")
|
tmpdir, err := os.MkdirTemp("", "")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
|
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
|
||||||
fileName := fmt.Sprintf("%s.yaml", "phi-2")
|
fileName := fmt.Sprintf("%s.yaml", "phi-2")
|
||||||
|
|
||||||
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
|
InstallModels([]config.Gallery{}, tmpdir, true, nil, url)
|
||||||
|
|
||||||
resultFile := filepath.Join(tmpdir, fileName)
|
resultFile := filepath.Join(tmpdir, fileName)
|
||||||
|
|
||||||
|
@ -47,45 +30,13 @@ var _ = Describe("Preload test", func() {
|
||||||
|
|
||||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
||||||
})
|
})
|
||||||
It("loads from embedded short-urls", func() {
|
|
||||||
tmpdir, err := os.MkdirTemp("", "")
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
url := "phi-2"
|
|
||||||
|
|
||||||
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
|
|
||||||
|
|
||||||
entry, err := os.ReadDir(tmpdir)
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
Expect(entry).To(HaveLen(1))
|
|
||||||
resultFile := entry[0].Name()
|
|
||||||
|
|
||||||
content, err := os.ReadFile(filepath.Join(tmpdir, resultFile))
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
|
|
||||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
|
||||||
})
|
|
||||||
It("loads from embedded models", func() {
|
|
||||||
tmpdir, err := os.MkdirTemp("", "")
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
url := "mistral-openorca"
|
|
||||||
fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
|
|
||||||
|
|
||||||
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
|
|
||||||
|
|
||||||
resultFile := filepath.Join(tmpdir, fileName)
|
|
||||||
|
|
||||||
content, err := os.ReadFile(resultFile)
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
|
|
||||||
Expect(string(content)).To(ContainSubstring("name: mistral-openorca"))
|
|
||||||
})
|
|
||||||
It("downloads from urls", func() {
|
It("downloads from urls", func() {
|
||||||
tmpdir, err := os.MkdirTemp("", "")
|
tmpdir, err := os.MkdirTemp("", "")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
|
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
|
||||||
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
|
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
|
||||||
|
|
||||||
err = InstallModels([]config.Gallery{}, "", tmpdir, false, nil, url)
|
err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
resultFile := filepath.Join(tmpdir, fileName)
|
resultFile := filepath.Join(tmpdir, fileName)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue