Merge branch 'master' into fix-selinux-nvidia-smi

This commit is contained in:
Alessandro Pirastru 2025-04-27 14:27:03 +02:00 committed by GitHub
commit 40883c410d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 560 additions and 188 deletions

View file

@ -45,7 +45,7 @@ jobs:
- build-type: 'hipblas' - build-type: 'hipblas'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg' tag-suffix: '-hipblas'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
aio: "-aio-gpu-hipblas" aio: "-aio-gpu-hipblas"
@ -58,17 +58,7 @@ jobs:
- build-type: 'hipblas' - build-type: 'hipblas'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-hipblas' tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1" base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -76,16 +66,6 @@ jobs:
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
latest-image: 'latest-gpu-hipblas-core' latest-image: 'latest-gpu-hipblas-core'
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs: self-hosted-jobs:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@ -115,54 +95,21 @@ jobs:
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
matrix: matrix:
include: include:
# Extra images
- build-type: '' - build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '' tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-ffmpeg'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
cuda-minor-version: "7" cuda-minor-version: "7"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg' tag-suffix: '-cublas-cuda11'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -176,7 +123,7 @@ jobs:
cuda-minor-version: "0" cuda-minor-version: "0"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg' tag-suffix: '-cublas-cuda12'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -185,22 +132,12 @@ jobs:
latest-image: 'latest-gpu-nvidia-cuda-12' latest-image: 'latest-gpu-nvidia-cuda-12'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12' latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16' - build-type: 'sycl_f16'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-ffmpeg' tag-suffix: '-sycl-f16'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -213,7 +150,7 @@ jobs:
tag-latest: 'auto' tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-ffmpeg' tag-suffix: '-sycl-f32'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'extras' image-type: 'extras'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -228,26 +165,6 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-core' tag-suffix: '-sycl-f16-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -258,7 +175,7 @@ jobs:
tag-latest: 'false' tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-ffmpeg-core' tag-suffix: '-sycl-f32-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -296,7 +213,7 @@ jobs:
- build-type: '' - build-type: ''
platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-ffmpeg-core' tag-suffix: '-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
@ -312,30 +229,6 @@ jobs:
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core' tag-suffix: '-cublas-cuda11-core'
ffmpeg: ''
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
ffmpeg: ''
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -348,7 +241,7 @@ jobs:
cuda-minor-version: "0" cuda-minor-version: "0"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core' tag-suffix: '-cublas-cuda12-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
@ -359,7 +252,7 @@ jobs:
- build-type: 'vulkan' - build-type: 'vulkan'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core' tag-suffix: '-vulkan-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'

View file

@ -6,7 +6,7 @@ BINARY_NAME=local-ai
DETECT_LIBS?=true DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
CPPLLAMA_VERSION?=295354ea6848a77bdee204ee1c971d9b92ffcca9 CPPLLAMA_VERSION?=77d5e9a76a7b4a8a7c5bf9cf6ebef91860123cba
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

View file

@ -14,6 +14,7 @@ service Backend {
rpc PredictStream(PredictOptions) returns (stream Reply) {} rpc PredictStream(PredictOptions) returns (stream Reply) {}
rpc Embedding(PredictOptions) returns (EmbeddingResult) {} rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
rpc GenerateImage(GenerateImageRequest) returns (Result) {} rpc GenerateImage(GenerateImageRequest) returns (Result) {}
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {} rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
rpc TTS(TTSRequest) returns (Result) {} rpc TTS(TTSRequest) returns (Result) {}
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {} rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
@ -301,6 +302,19 @@ message GenerateImageRequest {
int32 CLIPSkip = 11; int32 CLIPSkip = 11;
} }
message GenerateVideoRequest {
string prompt = 1;
string start_image = 2; // Path or base64 encoded image for the start frame
string end_image = 3; // Path or base64 encoded image for the end frame
int32 width = 4;
int32 height = 5;
int32 num_frames = 6; // Number of frames to generate
int32 fps = 7; // Frames per second
int32 seed = 8;
float cfg_scale = 9; // Classifier-free guidance scale
string dst = 10; // Output path for the generated video
}
message TTSRequest { message TTSRequest {
string text = 1; string text = 1;
string model = 2; string model = 2;

View file

@ -43,18 +43,12 @@ func New(opts ...config.AppOption) (*Application, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create ModelPath: %q", err) return nil, fmt.Errorf("unable to create ModelPath: %q", err)
} }
if options.ImageDir != "" { if options.GeneratedContentDir != "" {
err := os.MkdirAll(options.ImageDir, 0750) err := os.MkdirAll(options.GeneratedContentDir, 0750)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create ImageDir: %q", err) return nil, fmt.Errorf("unable to create ImageDir: %q", err)
} }
} }
if options.AudioDir != "" {
err := os.MkdirAll(options.AudioDir, 0750)
if err != nil {
return nil, fmt.Errorf("unable to create AudioDir: %q", err)
}
}
if options.UploadDir != "" { if options.UploadDir != "" {
err := os.MkdirAll(options.UploadDir, 0750) err := os.MkdirAll(options.UploadDir, 0750)
if err != nil { if err != nil {

View file

@ -35,12 +35,17 @@ func SoundGeneration(
return "", nil, fmt.Errorf("could not load sound generation model") return "", nil, fmt.Errorf("could not load sound generation model")
} }
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil { if err := os.MkdirAll(appConfig.GeneratedContentDir, 0750); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err) return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
} }
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav") audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
filePath := filepath.Join(appConfig.AudioDir, fileName) if err := os.MkdirAll(audioDir, 0750); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
}
fileName := utils.GenerateUniqueFileName(audioDir, "sound_generation", ".wav")
filePath := filepath.Join(audioDir, fileName)
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{ res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
Text: text, Text: text,

View file

@ -32,12 +32,13 @@ func ModelTTS(
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model) return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
} }
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil { audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
if err := os.MkdirAll(audioDir, 0750); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err) return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
} }
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav") fileName := utils.GenerateUniqueFileName(audioDir, "tts", ".wav")
filePath := filepath.Join(appConfig.AudioDir, fileName) filePath := filepath.Join(audioDir, fileName)
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect. // We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
// This should be addressed in a follow up PR soon. // This should be addressed in a follow up PR soon.

36
core/backend/video.go Normal file
View file

@ -0,0 +1,36 @@
package backend
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
)
func VideoGeneration(height, width int32, prompt, startImage, endImage, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
opts := ModelOptions(backendConfig, appConfig)
inferenceModel, err := loader.Load(
opts...,
)
if err != nil {
return nil, err
}
defer loader.Close()
fn := func() error {
_, err := inferenceModel.GenerateVideo(
appConfig.Context,
&proto.GenerateVideoRequest{
Height: height,
Width: width,
Prompt: prompt,
StartImage: startImage,
EndImage: endImage,
Dst: dst,
})
return err
}
return fn, nil
}

View file

@ -21,8 +21,7 @@ type RunCMD struct {
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"` GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
@ -81,8 +80,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithModelPath(r.ModelsPath), config.WithModelPath(r.ModelsPath),
config.WithContextSize(r.ContextSize), config.WithContextSize(r.ContextSize),
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel), config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
config.WithImageDir(r.ImagePath), config.WithGeneratedContentDir(r.GeneratedContentPath),
config.WithAudioDir(r.AudioPath),
config.WithUploadDir(r.UploadPath), config.WithUploadDir(r.UploadPath),
config.WithConfigsDir(r.ConfigPath), config.WithConfigsDir(r.ConfigPath),
config.WithDynamicConfigDir(r.LocalaiConfigDir), config.WithDynamicConfigDir(r.LocalaiConfigDir),

View file

@ -70,7 +70,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
opts := &config.ApplicationConfig{ opts := &config.ApplicationConfig{
ModelPath: t.ModelsPath, ModelPath: t.ModelsPath,
Context: context.Background(), Context: context.Background(),
AudioDir: outputDir, GeneratedContentDir: outputDir,
AssetsDestination: t.BackendAssetsPath, AssetsDestination: t.BackendAssetsPath,
ExternalGRPCBackends: externalBackends, ExternalGRPCBackends: externalBackends,
} }

View file

@ -36,10 +36,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
text := strings.Join(t.Text, " ") text := strings.Join(t.Text, " ")
opts := &config.ApplicationConfig{ opts := &config.ApplicationConfig{
ModelPath: t.ModelsPath, ModelPath: t.ModelsPath,
Context: context.Background(), Context: context.Background(),
AudioDir: outputDir, GeneratedContentDir: outputDir,
AssetsDestination: t.BackendAssetsPath, AssetsDestination: t.BackendAssetsPath,
} }
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend) ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)

View file

@ -19,20 +19,21 @@ type ApplicationConfig struct {
UploadLimitMB, Threads, ContextSize int UploadLimitMB, Threads, ContextSize int
F16 bool F16 bool
Debug bool Debug bool
ImageDir string GeneratedContentDir string
AudioDir string
UploadDir string ConfigsDir string
ConfigsDir string UploadDir string
DynamicConfigsDir string
DynamicConfigsDirPollInterval time.Duration DynamicConfigsDir string
CORS bool DynamicConfigsDirPollInterval time.Duration
CSRF bool CORS bool
PreloadJSONModels string CSRF bool
PreloadModelsFromPath string PreloadJSONModels string
CORSAllowOrigins string PreloadModelsFromPath string
ApiKeys []string CORSAllowOrigins string
P2PToken string ApiKeys []string
P2PNetworkID string P2PToken string
P2PNetworkID string
DisableWebUI bool DisableWebUI bool
EnforcePredownloadScans bool EnforcePredownloadScans bool
@ -279,15 +280,9 @@ func WithDebug(debug bool) AppOption {
} }
} }
func WithAudioDir(audioDir string) AppOption { func WithGeneratedContentDir(generatedContentDir string) AppOption {
return func(o *ApplicationConfig) { return func(o *ApplicationConfig) {
o.AudioDir = audioDir o.GeneratedContentDir = generatedContentDir
}
}
func WithImageDir(imageDir string) AppOption {
return func(o *ApplicationConfig) {
o.ImageDir = imageDir
} }
} }

View file

@ -436,18 +436,19 @@ func (c *BackendConfig) HasTemplate() bool {
type BackendConfigUsecases int type BackendConfigUsecases int
const ( const (
FLAG_ANY BackendConfigUsecases = 0b00000000000 FLAG_ANY BackendConfigUsecases = 0b000000000000
FLAG_CHAT BackendConfigUsecases = 0b00000000001 FLAG_CHAT BackendConfigUsecases = 0b000000000001
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010 FLAG_COMPLETION BackendConfigUsecases = 0b000000000010
FLAG_EDIT BackendConfigUsecases = 0b00000000100 FLAG_EDIT BackendConfigUsecases = 0b000000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000 FLAG_EMBEDDINGS BackendConfigUsecases = 0b000000001000
FLAG_RERANK BackendConfigUsecases = 0b00000010000 FLAG_RERANK BackendConfigUsecases = 0b000000010000
FLAG_IMAGE BackendConfigUsecases = 0b00000100000 FLAG_IMAGE BackendConfigUsecases = 0b000000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000 FLAG_TRANSCRIPT BackendConfigUsecases = 0b000001000000
FLAG_TTS BackendConfigUsecases = 0b00010000000 FLAG_TTS BackendConfigUsecases = 0b000010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000 FLAG_SOUND_GENERATION BackendConfigUsecases = 0b000100000000
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000 FLAG_TOKENIZE BackendConfigUsecases = 0b001000000000
FLAG_VAD BackendConfigUsecases = 0b10000000000 FLAG_VAD BackendConfigUsecases = 0b010000000000
FLAG_VIDEO BackendConfigUsecases = 0b100000000000
// Common Subsets // Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
@ -468,6 +469,7 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
"FLAG_TOKENIZE": FLAG_TOKENIZE, "FLAG_TOKENIZE": FLAG_TOKENIZE,
"FLAG_VAD": FLAG_VAD, "FLAG_VAD": FLAG_VAD,
"FLAG_LLM": FLAG_LLM, "FLAG_LLM": FLAG_LLM,
"FLAG_VIDEO": FLAG_VIDEO,
} }
} }
@ -532,6 +534,17 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
return false return false
} }
}
if (u & FLAG_VIDEO) == FLAG_VIDEO {
videoBackends := []string{"diffusers", "stablediffusion"}
if !slices.Contains(videoBackends, c.Backend) {
return false
}
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
return false
}
} }
if (u & FLAG_RERANK) == FLAG_RERANK { if (u & FLAG_RERANK) == FLAG_RERANK {
if c.Backend != "rerankers" { if c.Backend != "rerankers" {

View file

@ -5,6 +5,8 @@ import (
"errors" "errors"
"fmt" "fmt"
"net/http" "net/http"
"os"
"path/filepath"
"github.com/dave-gray101/v2keyauth" "github.com/dave-gray101/v2keyauth"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
@ -153,12 +155,19 @@ func API(application *application.Application) (*fiber.App, error) {
Browse: true, Browse: true,
})) }))
if application.ApplicationConfig().ImageDir != "" { if application.ApplicationConfig().GeneratedContentDir != "" {
router.Static("/generated-images", application.ApplicationConfig().ImageDir) os.MkdirAll(application.ApplicationConfig().GeneratedContentDir, 0750)
} audioPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "audio")
imagePath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "images")
videoPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "videos")
if application.ApplicationConfig().AudioDir != "" { os.MkdirAll(audioPath, 0750)
router.Static("/generated-audio", application.ApplicationConfig().AudioDir) os.MkdirAll(imagePath, 0750)
os.MkdirAll(videoPath, 0750)
router.Static("/generated-audio", audioPath)
router.Static("/generated-images", imagePath)
router.Static("/generated-videos", videoPath)
} }
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration // Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration

View file

@ -629,8 +629,7 @@ var _ = Describe("API test", func() {
application, err := application.New( application, err := application.New(
append(commonOpts, append(commonOpts,
config.WithContext(c), config.WithContext(c),
config.WithAudioDir(tmpdir), config.WithGeneratedContentDir(tmpdir),
config.WithImageDir(tmpdir),
config.WithGalleries(galleries), config.WithGalleries(galleries),
config.WithModelPath(modelDir), config.WithModelPath(modelDir),
config.WithBackendAssets(backendAssets), config.WithBackendAssets(backendAssets),

View file

@ -0,0 +1,205 @@
package localai
import (
"bufio"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/backend"
"github.com/gofiber/fiber/v2"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
func downloadFile(url string) (string, error) {
// Get the data
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
// Create the file
out, err := os.CreateTemp("", "video")
if err != nil {
return "", err
}
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
return out.Name(), err
}
//
/*
*
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "A cute baby sea otter",
"n": 1,
"size": "512x512"
}'
*
*/
// VideoEndpoint
// @Summary Creates a video given a prompt.
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /video [post]
func VideoEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VideoRequest)
if !ok || input.Model == "" {
log.Error().Msg("Video Endpoint - Invalid Input")
return fiber.ErrBadRequest
}
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || config == nil {
log.Error().Msg("Video Endpoint - Invalid Config")
return fiber.ErrBadRequest
}
src := ""
if input.StartImage != "" {
var fileData []byte
var err error
// check if input.File is an URL, if so download it and save it
// to a temporary file
if strings.HasPrefix(input.StartImage, "http://") || strings.HasPrefix(input.StartImage, "https://") {
out, err := downloadFile(input.StartImage)
if err != nil {
return fmt.Errorf("failed downloading file:%w", err)
}
defer os.RemoveAll(out)
fileData, err = os.ReadFile(out)
if err != nil {
return fmt.Errorf("failed reading file:%w", err)
}
} else {
// base 64 decode the file and write it somewhere
// that we will cleanup
fileData, err = base64.StdEncoding.DecodeString(input.StartImage)
if err != nil {
return err
}
}
// Create a temporary file
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
if err != nil {
return err
}
// write the base64 result
writer := bufio.NewWriter(outputFile)
_, err = writer.Write(fileData)
if err != nil {
outputFile.Close()
return err
}
outputFile.Close()
src = outputFile.Name()
defer os.RemoveAll(src)
}
log.Debug().Msgf("Parameter Config: %+v", config)
switch config.Backend {
case "stablediffusion":
config.Backend = model.StableDiffusionGGMLBackend
case "":
config.Backend = model.StableDiffusionGGMLBackend
}
width := input.Width
height := input.Height
if width == 0 {
width = 512
}
if height == 0 {
height = 512
}
b64JSON := input.ResponseFormat == "b64_json"
tempDir := ""
if !b64JSON {
tempDir = filepath.Join(appConfig.GeneratedContentDir, "videos")
}
// Create a temporary file
outputFile, err := os.CreateTemp(tempDir, "b64")
if err != nil {
return err
}
outputFile.Close()
// TODO: use mime type to determine the extension
output := outputFile.Name() + ".mp4"
// Rename the temporary file
err = os.Rename(outputFile.Name(), output)
if err != nil {
return err
}
baseURL := c.BaseURL()
fn, err := backend.VideoGeneration(height, width, input.Prompt, src, input.EndImage, output, ml, *config, appConfig)
if err != nil {
return err
}
if err := fn(); err != nil {
return err
}
item := &schema.Item{}
if b64JSON {
defer os.RemoveAll(output)
data, err := os.ReadFile(output)
if err != nil {
return err
}
item.B64JSON = base64.StdEncoding.EncodeToString(data)
} else {
base := filepath.Base(output)
item.URL = baseURL + "/generated-videos/" + base
}
id := uuid.New().String()
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Data: []schema.Item{*item},
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}

View file

@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
log.Error().Msg("Image Endpoint - Invalid Input") log.Error().Msg("Image Endpoint - Invalid Input")
return fiber.ErrBadRequest return fiber.ErrBadRequest
} }
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig) config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || config == nil { if !ok || config == nil {
log.Error().Msg("Image Endpoint - Invalid Config") log.Error().Msg("Image Endpoint - Invalid Config")
@ -108,7 +108,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
} }
// Create a temporary file // Create a temporary file
outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64") outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
if err != nil { if err != nil {
return err return err
} }
@ -184,7 +184,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
tempDir := "" tempDir := ""
if !b64JSON { if !b64JSON {
tempDir = appConfig.ImageDir tempDir = filepath.Join(appConfig.GeneratedContentDir, "images")
} }
// Create a temporary file // Create a temporary file
outputFile, err := os.CreateTemp(tempDir, "b64") outputFile, err := os.CreateTemp(tempDir, "b64")
@ -192,6 +192,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
return err return err
} }
outputFile.Close() outputFile.Close()
output := outputFile.Name() + ".png" output := outputFile.Name() + ".png"
// Rename the temporary file // Rename the temporary file
err = os.Rename(outputFile.Name(), output) err = os.Rename(outputFile.Name(), output)

View file

@ -59,6 +59,11 @@ func RegisterLocalAIRoutes(router *fiber.App,
router.Get("/metrics", localai.LocalAIMetricsEndpoint()) router.Get("/metrics", localai.LocalAIMetricsEndpoint())
} }
router.Post("/video",
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VideoRequest) }),
localai.VideoEndpoint(cl, ml, appConfig))
// Backend Statistics Module // Backend Statistics Module
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple. // TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now

View file

@ -24,6 +24,20 @@ type GalleryResponse struct {
StatusURL string `json:"status"` StatusURL string `json:"status"`
} }
type VideoRequest struct {
BasicModelRequest
Prompt string `json:"prompt" yaml:"prompt"`
StartImage string `json:"start_image" yaml:"start_image"`
EndImage string `json:"end_image" yaml:"end_image"`
Width int32 `json:"width" yaml:"width"`
Height int32 `json:"height" yaml:"height"`
NumFrames int32 `json:"num_frames" yaml:"num_frames"`
FPS int32 `json:"fps" yaml:"fps"`
Seed int32 `json:"seed" yaml:"seed"`
CFGScale float32 `json:"cfg_scale" yaml:"cfg_scale"`
ResponseFormat string `json:"response_format" yaml:"response_format"`
}
// @Description TTS request body // @Description TTS request body
type TTSRequest struct { type TTSRequest struct {
BasicModelRequest BasicModelRequest

View file

@ -481,8 +481,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
|-----------|---------|-------------|----------------------| |-----------|---------|-------------|----------------------|
| --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | | --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH |
| --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH | | --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH |
| --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH | | --generated-content-path | /tmp/generated/content | Location for assets generated by backends (e.g. stablediffusion) | $LOCALAI_GENERATED_CONTENT_PATH |
| --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH |
| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | | --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |

View file

@ -278,3 +278,36 @@ docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=
``` ```
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled. Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
## Vulkan acceleration
### Requirements
If using nvidia, follow the steps in the [CUDA](#cudanvidia-acceleration) section to configure your docker runtime to allow access to the GPU.
### Container images
To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}}-vulkan-ffmpeg-core`.
#### Example
To run LocalAI with Docker and Vulkan, you can use the following command as an example:
```bash
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core
```
### Notes
In addition to the commands to run LocalAI normally, you need to specify additonal flags to pass the GPU hardware to the container.
These flags are the same as the sections above, depending on the hardware, for [nvidia](#cudanvidia-acceleration), [AMD](#rocmamd-acceleration) or [Intel](#intel-acceleration-sycl).
If you have mixed hardware, you can pass flags for multiple GPUs, for example:
```bash
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \
--gpus=all \ # nvidia passthrough
--device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
localai/localai:latest-vulkan-ffmpeg-core
```

View file

@ -90,12 +90,16 @@
You can find the half-precision version here. You can find the half-precision version here.
overrides: overrides:
mmproj: mmproj-google_gemma-3-12b-it-qat-f16.gguf
parameters: parameters:
model: google_gemma-3-12b-it-qat-Q4_0.gguf model: google_gemma-3-12b-it-qat-Q4_0.gguf
files: files:
- filename: google_gemma-3-12b-it-qat-Q4_0.gguf - filename: google_gemma-3-12b-it-qat-Q4_0.gguf
sha256: 2ad4c9ce431a2d5b80af37983828c2cfb8f4909792ca5075e0370e3a71ca013d sha256: 2ad4c9ce431a2d5b80af37983828c2cfb8f4909792ca5075e0370e3a71ca013d
uri: huggingface://bartowski/google_gemma-3-12b-it-qat-GGUF/google_gemma-3-12b-it-qat-Q4_0.gguf uri: huggingface://bartowski/google_gemma-3-12b-it-qat-GGUF/google_gemma-3-12b-it-qat-Q4_0.gguf
- filename: mmproj-google_gemma-3-12b-it-qat-f16.gguf
sha256: 30c02d056410848227001830866e0a269fcc28aaf8ca971bded494003de9f5a5
uri: huggingface://bartowski/google_gemma-3-12b-it-qat-GGUF/mmproj-google_gemma-3-12b-it-qat-f16.gguf
- !!merge <<: *gemma3 - !!merge <<: *gemma3
name: "gemma-3-4b-it-qat" name: "gemma-3-4b-it-qat"
urls: urls:
@ -108,12 +112,16 @@
You can find the half-precision version here. You can find the half-precision version here.
overrides: overrides:
mmproj: mmproj-google_gemma-3-4b-it-qat-f16.gguf
parameters: parameters:
model: google_gemma-3-4b-it-qat-Q4_0.gguf model: google_gemma-3-4b-it-qat-Q4_0.gguf
files: files:
- filename: google_gemma-3-4b-it-qat-Q4_0.gguf - filename: google_gemma-3-4b-it-qat-Q4_0.gguf
sha256: 0231e2cba887f4c7834c39b34251e26b2eebbb71dfac0f7e6e2b2c2531c1a583 sha256: 0231e2cba887f4c7834c39b34251e26b2eebbb71dfac0f7e6e2b2c2531c1a583
uri: huggingface://bartowski/google_gemma-3-4b-it-qat-GGUF/google_gemma-3-4b-it-qat-Q4_0.gguf uri: huggingface://bartowski/google_gemma-3-4b-it-qat-GGUF/google_gemma-3-4b-it-qat-Q4_0.gguf
- filename: mmproj-google_gemma-3-4b-it-qat-f16.gguf
sha256: 8c0fb064b019a6972856aaae2c7e4792858af3ca4561be2dbf649123ba6c40cb
uri: huggingface://bartowski/google_gemma-3-4b-it-qat-GGUF/mmproj-google_gemma-3-4b-it-qat-f16.gguf
- !!merge <<: *gemma3 - !!merge <<: *gemma3
name: "gemma-3-27b-it-qat" name: "gemma-3-27b-it-qat"
urls: urls:
@ -126,12 +134,16 @@
You can find the half-precision version here. You can find the half-precision version here.
overrides: overrides:
mmproj: mmproj-google_gemma-3-27b-it-qat-f16.gguf
parameters: parameters:
model: google_gemma-3-27b-it-qat-Q4_0.gguf model: google_gemma-3-27b-it-qat-Q4_0.gguf
files: files:
- filename: gemma-3-27b-it-q4_0.gguf - filename: google_gemma-3-27b-it-qat-Q4_0.gguf
sha256: 4f1e32db877a9339df2d6529c1635570425cbe81f0aa3f7dd5d1452f2e632b42 sha256: 4f1e32db877a9339df2d6529c1635570425cbe81f0aa3f7dd5d1452f2e632b42
uri: huggingface://bartowski/google_gemma-3-27b-it-qat-GGUF/google_gemma-3-27b-it-qat-Q4_0.gguf uri: huggingface://bartowski/google_gemma-3-27b-it-qat-GGUF/google_gemma-3-27b-it-qat-Q4_0.gguf
- filename: mmproj-google_gemma-3-27b-it-qat-f16.gguf
sha256: 54cb61c842fe49ac3c89bc1a614a2778163eb49f3dec2b90ff688b4c0392cb48
uri: huggingface://bartowski/google_gemma-3-27b-it-qat-GGUF/mmproj-google_gemma-3-27b-it-qat-f16.gguf
- !!merge <<: *gemma3 - !!merge <<: *gemma3
name: "qgallouedec_gemma-3-27b-it-codeforces-sft" name: "qgallouedec_gemma-3-27b-it-codeforces-sft"
urls: urls:
@ -1661,6 +1673,45 @@
- filename: Llama_3.3_70b_DarkHorse.i1-Q4_K_M.gguf - filename: Llama_3.3_70b_DarkHorse.i1-Q4_K_M.gguf
sha256: 413a0b9203326ea78fdbdcfd89a3e0475a18f0f73fee3a6bfe1327e7b48942e2 sha256: 413a0b9203326ea78fdbdcfd89a3e0475a18f0f73fee3a6bfe1327e7b48942e2
uri: huggingface://mradermacher/Llama_3.3_70b_DarkHorse-i1-GGUF/Llama_3.3_70b_DarkHorse.i1-Q4_K_M.gguf uri: huggingface://mradermacher/Llama_3.3_70b_DarkHorse-i1-GGUF/Llama_3.3_70b_DarkHorse.i1-Q4_K_M.gguf
- !!merge <<: *llama33
name: "l3.3-geneticlemonade-unleashed-v2-70b"
icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/0GTX4-erpPflLOkfH5sU5.png
urls:
- https://huggingface.co/zerofata/L3.3-GeneticLemonade-Unleashed-v2-70B
- https://huggingface.co/mradermacher/L3.3-GeneticLemonade-Unleashed-v2-70B-GGUF
description: |
An experimental release.
zerofata/GeneticLemonade-Unleashed qlora trained on a test dataset. Performance is improved from the original in my testing, but there are possibly (likely?) areas where the model will underperform which I am looking for feedback on.
This is a creative model intended to excel at character driven RP / ERP. It has not been tested or trained on adventure stories or any large amounts of creative writing.
overrides:
parameters:
model: L3.3-GeneticLemonade-Unleashed-v2-70B.Q4_K_M.gguf
files:
- filename: L3.3-GeneticLemonade-Unleashed-v2-70B.Q4_K_M.gguf
sha256: 347f0b7cea9926537643dafbe442d830734399bb6e6ff6c5bc0f69e583444548
uri: huggingface://mradermacher/L3.3-GeneticLemonade-Unleashed-v2-70B-GGUF/L3.3-GeneticLemonade-Unleashed-v2-70B.Q4_K_M.gguf
- !!merge <<: *llama33
name: "l3.3-genetic-lemonade-sunset-70b"
icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/txglu74hAoRrQw91rESrD.png
urls:
- https://huggingface.co/zerofata/L3.3-Genetic-Lemonade-Sunset-70B
- https://huggingface.co/mradermacher/L3.3-Genetic-Lemonade-Sunset-70B-GGUF
description: |
Inspired to learn how to merge by the Nevoria series from SteelSkull.
I wasn't planning to release any more models in this series, but I wasn't fully satisfied with Unleashed or the Final version. I happened upon the below when testing merges and found myself coming back to it, so decided to publish.
Model Comparison
Designed for RP and creative writing, all three models are focused around striking a balance between writing style, creativity and intelligence.
overrides:
parameters:
model: L3.3-Genetic-Lemonade-Sunset-70B.Q4_K_M.gguf
files:
- filename: L3.3-Genetic-Lemonade-Sunset-70B.Q4_K_M.gguf
sha256: 743c11180c0c9168c0fe31a97f9d2efe0dd749c2797d749821fcb1d6932c19f7
uri: huggingface://mradermacher/L3.3-Genetic-Lemonade-Sunset-70B-GGUF/L3.3-Genetic-Lemonade-Sunset-70B.Q4_K_M.gguf
- &rwkv - &rwkv
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
name: "rwkv-6-world-7b" name: "rwkv-6-world-7b"

View file

@ -39,6 +39,7 @@ type Backend interface {
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error)
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)

View file

@ -53,6 +53,10 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
return fmt.Errorf("unimplemented") return fmt.Errorf("unimplemented")
} }
func (llm *Base) GenerateVideo(*pb.GenerateVideoRequest) error {
return fmt.Errorf("unimplemented")
}
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) { func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) {
return pb.TranscriptResult{}, fmt.Errorf("unimplemented") return pb.TranscriptResult{}, fmt.Errorf("unimplemented")
} }

View file

@ -215,6 +215,28 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
return client.GenerateImage(ctx, in, opts...) return client.GenerateImage(ctx, in, opts...)
} }
func (c *Client) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
}
c.setBusy(true)
defer c.setBusy(false)
c.wdMark()
defer c.wdUnMark()
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithDefaultCallOptions(
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
))
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)
return client.GenerateVideo(ctx, in, opts...)
}
func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) { func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
if !c.parallel { if !c.parallel {
c.opMutex.Lock() c.opMutex.Lock()

View file

@ -47,6 +47,10 @@ func (e *embedBackend) GenerateImage(ctx context.Context, in *pb.GenerateImageRe
return e.s.GenerateImage(ctx, in) return e.s.GenerateImage(ctx, in)
} }
func (e *embedBackend) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
return e.s.GenerateVideo(ctx, in)
}
func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) { func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
return e.s.TTS(ctx, in) return e.s.TTS(ctx, in)
} }

View file

@ -14,6 +14,7 @@ type LLM interface {
Load(*pb.ModelOptions) error Load(*pb.ModelOptions) error
Embeddings(*pb.PredictOptions) ([]float32, error) Embeddings(*pb.PredictOptions) ([]float32, error)
GenerateImage(*pb.GenerateImageRequest) error GenerateImage(*pb.GenerateImageRequest) error
GenerateVideo(*pb.GenerateVideoRequest) error
AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error)
TTS(*pb.TTSRequest) error TTS(*pb.TTSRequest) error
SoundGeneration(*pb.SoundGenerationRequest) error SoundGeneration(*pb.SoundGenerationRequest) error

View file

@ -75,6 +75,18 @@ func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest)
return &pb.Result{Message: "Image generated", Success: true}, nil return &pb.Result{Message: "Image generated", Success: true}, nil
} }
func (s *server) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest) (*pb.Result, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
err := s.llm.GenerateVideo(in)
if err != nil {
return &pb.Result{Message: fmt.Sprintf("Error generating video: %s", err.Error()), Success: false}, err
}
return &pb.Result{Message: "Video generated", Success: true}, nil
}
func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) { func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) {
if s.llm.Locking() { if s.llm.Locking() {
s.llm.Lock() s.llm.Lock()

View file

@ -812,6 +812,30 @@ const docTemplate = `{
} }
} }
} }
},
"/video": {
"post": {
"summary": "Creates a video given a prompt.",
"parameters": [
{
"description": "query params",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenAIRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.OpenAIResponse"
}
}
}
}
} }
}, },
"definitions": { "definitions": {

View file

@ -805,6 +805,30 @@
} }
} }
} }
},
"/video": {
"post": {
"summary": "Creates a video given a prompt.",
"parameters": [
{
"description": "query params",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenAIRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.OpenAIResponse"
}
}
}
}
} }
}, },
"definitions": { "definitions": {

View file

@ -1248,6 +1248,21 @@ paths:
schema: schema:
$ref: '#/definitions/proto.VADResponse' $ref: '#/definitions/proto.VADResponse'
summary: Detect voice fragments in an audio stream summary: Detect voice fragments in an audio stream
/video:
post:
parameters:
- description: query params
in: body
name: request
required: true
schema:
$ref: '#/definitions/schema.OpenAIRequest'
responses:
"200":
description: Response
schema:
$ref: '#/definitions/schema.OpenAIResponse'
summary: Creates a video given a prompt.
securityDefinitions: securityDefinitions:
BearerAuth: BearerAuth:
in: header in: header