Merge branch 'master' into fix-selinux-nvidia-smi

2025-05-20 18:45:00 +00:00 · 2025-04-27 14:27:03 +02:00 · 2025-04-27 14:27:03 +02:00 · 40883c410d
commit 40883c410d
parent 9c1429a505 2a92effc5d
30 changed files with 560 additions and 188 deletions
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -45,7 +45,7 @@ jobs:
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
-            tag-suffix: '-hipblas-ffmpeg'
+            tag-suffix: '-hipblas'
            ffmpeg: 'true'
            image-type: 'extras'
            aio: "-aio-gpu-hipblas"
@ -58,17 +58,7 @@ jobs:
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-hipblas'
+            tag-suffix: '-hipblas-core'
            ffmpeg: 'false'
            image-type: 'extras'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-hipblas-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@ -76,16 +66,6 @@ jobs:
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
            latest-image: 'latest-gpu-hipblas-core'
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-hipblas-core'
            ffmpeg: 'false'
            image-type: 'core'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
  self-hosted-jobs:
    uses: ./.github/workflows/image_build.yml
    with:
@ -115,54 +95,21 @@ jobs:
      max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
      matrix:
        include:
          # Extra images
          - build-type: ''
            #platforms: 'linux/amd64,linux/arm64'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: ''
            ffmpeg: ''
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: ''
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11'
            ffmpeg: ''
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
            ffmpeg: ''
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
-            tag-suffix: '-cublas-cuda11-ffmpeg'
+            tag-suffix: '-cublas-cuda11'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
@ -176,7 +123,7 @@ jobs:
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
-            tag-suffix: '-cublas-cuda12-ffmpeg'
+            tag-suffix: '-cublas-cuda12'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
@ -185,22 +132,12 @@ jobs:
            latest-image: 'latest-gpu-nvidia-cuda-12'
            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: ''
            #platforms: 'linux/amd64,linux/arm64'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: ''
            ffmpeg: ''
            image-type: 'extras'
            base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16-ffmpeg'
+            tag-suffix: '-sycl-f16'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
@ -213,7 +150,7 @@ jobs:
            tag-latest: 'auto'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32-ffmpeg'
+            tag-suffix: '-sycl-f32'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
@ -228,26 +165,6 @@ jobs:
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-core'
            ffmpeg: 'false'
            image-type: 'core'
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32-core'
            ffmpeg: 'false'
            image-type: 'core'
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
@ -258,7 +175,7 @@ jobs:
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32-ffmpeg-core'
+            tag-suffix: '-sycl-f32-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
@ -296,7 +213,7 @@ jobs:
          - build-type: ''
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
-            tag-suffix: '-ffmpeg-core'
+            tag-suffix: '-core'
            ffmpeg: 'true'
            image-type: 'core'
            base-image: "ubuntu:22.04"
@ -312,30 +229,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-core'
            ffmpeg: ''
            image-type: 'core'
            base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
            ffmpeg: ''
            image-type: 'core'
            base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
@ -348,7 +241,7 @@ jobs:
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-ffmpeg-core'
+            tag-suffix: '-cublas-cuda12-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
@ -359,7 +252,7 @@ jobs:
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-vulkan-ffmpeg-core'
+            tag-suffix: '-vulkan-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
--- a/2
+++ b/2
@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true
 # llama.cpp versions
-CPPLLAMA_VERSION?=295354ea6848a77bdee204ee1c971d9b92ffcca9
+CPPLLAMA_VERSION?=77d5e9a76a7b4a8a7c5bf9cf6ebef91860123cba
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
--- a/backend/backend.proto
+++ b/backend/backend.proto
@ -14,6 +14,7 @@ service Backend {
  rpc PredictStream(PredictOptions) returns (stream Reply) {}
  rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
  rpc GenerateImage(GenerateImageRequest) returns (Result) {}
  rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
  rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
  rpc TTS(TTSRequest) returns (Result) {}
  rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
@ -301,6 +302,19 @@ message GenerateImageRequest {
  int32 CLIPSkip = 11;
 }
 message GenerateVideoRequest {
  string prompt = 1;
  string start_image = 2;  // Path or base64 encoded image for the start frame
  string end_image = 3;    // Path or base64 encoded image for the end frame
  int32 width = 4;
  int32 height = 5;
  int32 num_frames = 6;    // Number of frames to generate
  int32 fps = 7;          // Frames per second
  int32 seed = 8;
  float cfg_scale = 9;    // Classifier-free guidance scale
  string dst = 10;        // Output path for the generated video
 }
 message TTSRequest {
  string text = 1;
  string model = 2;
--- a/core/application/startup.go
+++ b/core/application/startup.go
@ -43,18 +43,12 @@ func New(opts ...config.AppOption) (*Application, error) {
 	if err != nil {
 		return nil, fmt.Errorf("unable to create ModelPath: %q", err)
 	}
-	if options.ImageDir != "" {
+	if options.GeneratedContentDir != "" {
-		err := os.MkdirAll(options.ImageDir, 0750)
+		err := os.MkdirAll(options.GeneratedContentDir, 0750)
 		if err != nil {
 			return nil, fmt.Errorf("unable to create ImageDir: %q", err)
 		}
 	}
 	if options.AudioDir != "" {
 		err := os.MkdirAll(options.AudioDir, 0750)
 		if err != nil {
 			return nil, fmt.Errorf("unable to create AudioDir: %q", err)
 		}
 	}
 	if options.UploadDir != "" {
 		err := os.MkdirAll(options.UploadDir, 0750)
 		if err != nil {
--- a/core/backend/soundgeneration.go
+++ b/core/backend/soundgeneration.go
@ -35,12 +35,17 @@ func SoundGeneration(
 		return "", nil, fmt.Errorf("could not load sound generation model")
 	}
-	if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
+	if err := os.MkdirAll(appConfig.GeneratedContentDir, 0750); err != nil {
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}
-	fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
+	audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
-	filePath := filepath.Join(appConfig.AudioDir, fileName)
+	if err := os.MkdirAll(audioDir, 0750); err != nil {
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}
 	fileName := utils.GenerateUniqueFileName(audioDir, "sound_generation", ".wav")
 	filePath := filepath.Join(audioDir, fileName)
 	res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
 		Text:        text,
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@ -32,12 +32,13 @@ func ModelTTS(
 		return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
 	}
-	if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
+	audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
 	if err := os.MkdirAll(audioDir, 0750); err != nil {
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}
-	fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
+	fileName := utils.GenerateUniqueFileName(audioDir, "tts", ".wav")
-	filePath := filepath.Join(appConfig.AudioDir, fileName)
+	filePath := filepath.Join(audioDir, fileName)
 	// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
 	// This should be addressed in a follow up PR soon.
--- a/core/backend/video.go
+++ b/core/backend/video.go
@ -0,0 +1,36 @@
 package backend
 import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/grpc/proto"
 	model "github.com/mudler/LocalAI/pkg/model"
 )
 func VideoGeneration(height, width int32, prompt, startImage, endImage, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
 	opts := ModelOptions(backendConfig, appConfig)
 	inferenceModel, err := loader.Load(
 		opts...,
 	)
 	if err != nil {
 		return nil, err
 	}
 	defer loader.Close()
 	fn := func() error {
 		_, err := inferenceModel.GenerateVideo(
 			appConfig.Context,
 			&proto.GenerateVideoRequest{
 				Height:     height,
 				Width:      width,
 				Prompt:     prompt,
 				StartImage: startImage,
 				EndImage:   endImage,
 				Dst:        dst,
 			})
 		return err
 	}
 	return fn, nil
 }
--- a/core/cli/run.go
+++ b/core/cli/run.go
@ -21,8 +21,7 @@ type RunCMD struct {
 	ModelsPath                   string        `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 	BackendAssetsPath            string        `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
-	ImagePath                    string        `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
+	GeneratedContentPath         string        `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
 	AudioPath                    string        `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
 	UploadPath                   string        `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
 	ConfigPath                   string        `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
 	LocalaiConfigDir             string        `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
@ -81,8 +80,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithModelPath(r.ModelsPath),
 		config.WithContextSize(r.ContextSize),
 		config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
-		config.WithImageDir(r.ImagePath),
+		config.WithGeneratedContentDir(r.GeneratedContentPath),
 		config.WithAudioDir(r.AudioPath),
 		config.WithUploadDir(r.UploadPath),
 		config.WithConfigsDir(r.ConfigPath),
 		config.WithDynamicConfigDir(r.LocalaiConfigDir),
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@ -70,7 +70,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
 	opts := &config.ApplicationConfig{
 		ModelPath:            t.ModelsPath,
 		Context:              context.Background(),
-		AudioDir:             outputDir,
+		GeneratedContentDir:  outputDir,
 		AssetsDestination:    t.BackendAssetsPath,
 		ExternalGRPCBackends: externalBackends,
 	}
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@ -36,10 +36,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
 	text := strings.Join(t.Text, " ")
 	opts := &config.ApplicationConfig{
-		ModelPath:         t.ModelsPath,
+		ModelPath:           t.ModelsPath,
-		Context:           context.Background(),
+		Context:             context.Background(),
-		AudioDir:          outputDir,
+		GeneratedContentDir: outputDir,
-		AssetsDestination: t.BackendAssetsPath,
+		AssetsDestination:   t.BackendAssetsPath,
 	}
 	ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@ -19,20 +19,21 @@ type ApplicationConfig struct {
 	UploadLimitMB, Threads, ContextSize int
 	F16                                 bool
 	Debug                               bool
-	ImageDir                            string
+	GeneratedContentDir                 string
-	AudioDir                            string
+
-	UploadDir                           string
+	ConfigsDir string
-	ConfigsDir                          string
+	UploadDir  string
-	DynamicConfigsDir                   string
+
-	DynamicConfigsDirPollInterval       time.Duration
+	DynamicConfigsDir             string
-	CORS                                bool
+	DynamicConfigsDirPollInterval time.Duration
-	CSRF                                bool
+	CORS                          bool
-	PreloadJSONModels                   string
+	CSRF                          bool
-	PreloadModelsFromPath               string
+	PreloadJSONModels             string
-	CORSAllowOrigins                    string
+	PreloadModelsFromPath         string
-	ApiKeys                             []string
+	CORSAllowOrigins              string
-	P2PToken                            string
+	ApiKeys                       []string
-	P2PNetworkID                        string
+	P2PToken                      string
 	P2PNetworkID                  string
 	DisableWebUI                       bool
 	EnforcePredownloadScans            bool
@ -279,15 +280,9 @@ func WithDebug(debug bool) AppOption {
 	}
 }
-func WithAudioDir(audioDir string) AppOption {
+func WithGeneratedContentDir(generatedContentDir string) AppOption {
 	return func(o *ApplicationConfig) {
-		o.AudioDir = audioDir
+		o.GeneratedContentDir = generatedContentDir
 	}
 }
 func WithImageDir(imageDir string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.ImageDir = imageDir
 	}
 }
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@ -436,18 +436,19 @@ func (c *BackendConfig) HasTemplate() bool {
 type BackendConfigUsecases int
 const (
-	FLAG_ANY              BackendConfigUsecases = 0b00000000000
+	FLAG_ANY              BackendConfigUsecases = 0b000000000000
-	FLAG_CHAT             BackendConfigUsecases = 0b00000000001
+	FLAG_CHAT             BackendConfigUsecases = 0b000000000001
-	FLAG_COMPLETION       BackendConfigUsecases = 0b00000000010
+	FLAG_COMPLETION       BackendConfigUsecases = 0b000000000010
-	FLAG_EDIT             BackendConfigUsecases = 0b00000000100
+	FLAG_EDIT             BackendConfigUsecases = 0b000000000100
-	FLAG_EMBEDDINGS       BackendConfigUsecases = 0b00000001000
+	FLAG_EMBEDDINGS       BackendConfigUsecases = 0b000000001000
-	FLAG_RERANK           BackendConfigUsecases = 0b00000010000
+	FLAG_RERANK           BackendConfigUsecases = 0b000000010000
-	FLAG_IMAGE            BackendConfigUsecases = 0b00000100000
+	FLAG_IMAGE            BackendConfigUsecases = 0b000000100000
-	FLAG_TRANSCRIPT       BackendConfigUsecases = 0b00001000000
+	FLAG_TRANSCRIPT       BackendConfigUsecases = 0b000001000000
-	FLAG_TTS              BackendConfigUsecases = 0b00010000000
+	FLAG_TTS              BackendConfigUsecases = 0b000010000000
-	FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
+	FLAG_SOUND_GENERATION BackendConfigUsecases = 0b000100000000
-	FLAG_TOKENIZE         BackendConfigUsecases = 0b01000000000
+	FLAG_TOKENIZE         BackendConfigUsecases = 0b001000000000
-	FLAG_VAD              BackendConfigUsecases = 0b10000000000
+	FLAG_VAD              BackendConfigUsecases = 0b010000000000
 	FLAG_VIDEO            BackendConfigUsecases = 0b100000000000
 	// Common Subsets
 	FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
@ -468,6 +469,7 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
 		"FLAG_TOKENIZE":         FLAG_TOKENIZE,
 		"FLAG_VAD":              FLAG_VAD,
 		"FLAG_LLM":              FLAG_LLM,
 		"FLAG_VIDEO":            FLAG_VIDEO,
 	}
 }
@ -532,6 +534,17 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
 			return false
 		}
 	}
 	if (u & FLAG_VIDEO) == FLAG_VIDEO {
 		videoBackends := []string{"diffusers", "stablediffusion"}
 		if !slices.Contains(videoBackends, c.Backend) {
 			return false
 		}
 		if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
 			return false
 		}
 	}
 	if (u & FLAG_RERANK) == FLAG_RERANK {
 		if c.Backend != "rerankers" {
--- a/core/http/app.go
+++ b/core/http/app.go
@ -5,6 +5,8 @@ import (
 	"errors"
 	"fmt"
 	"net/http"
 	"os"
 	"path/filepath"
 	"github.com/dave-gray101/v2keyauth"
 	"github.com/mudler/LocalAI/pkg/utils"
@ -153,12 +155,19 @@ func API(application *application.Application) (*fiber.App, error) {
 		Browse:     true,
 	}))
-	if application.ApplicationConfig().ImageDir != "" {
+	if application.ApplicationConfig().GeneratedContentDir != "" {
-		router.Static("/generated-images", application.ApplicationConfig().ImageDir)
+		os.MkdirAll(application.ApplicationConfig().GeneratedContentDir, 0750)
-	}
+		audioPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "audio")
 		imagePath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "images")
 		videoPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "videos")
-	if application.ApplicationConfig().AudioDir != "" {
+		os.MkdirAll(audioPath, 0750)
-		router.Static("/generated-audio", application.ApplicationConfig().AudioDir)
+		os.MkdirAll(imagePath, 0750)
 		os.MkdirAll(videoPath, 0750)
 		router.Static("/generated-audio", audioPath)
 		router.Static("/generated-images", imagePath)
 		router.Static("/generated-videos", videoPath)
 	}
 	// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@ -629,8 +629,7 @@ var _ = Describe("API test", func() {
 			application, err := application.New(
 				append(commonOpts,
 					config.WithContext(c),
-					config.WithAudioDir(tmpdir),
+					config.WithGeneratedContentDir(tmpdir),
 					config.WithImageDir(tmpdir),
 					config.WithGalleries(galleries),
 					config.WithModelPath(modelDir),
 					config.WithBackendAssets(backendAssets),
--- a/core/http/endpoints/localai/video.go
+++ b/core/http/endpoints/localai/video.go
@ -0,0 +1,205 @@
 package localai
 import (
 	"bufio"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/gofiber/fiber/v2"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
 func downloadFile(url string) (string, error) {
 	// Get the data
 	resp, err := http.Get(url)
 	if err != nil {
 		return "", err
 	}
 	defer resp.Body.Close()
 	// Create the file
 	out, err := os.CreateTemp("", "video")
 	if err != nil {
 		return "", err
 	}
 	defer out.Close()
 	// Write the body to file
 	_, err = io.Copy(out, resp.Body)
 	return out.Name(), err
 }
 //
 /*
 *
 	curl http://localhost:8080/v1/images/generations \
 	  -H "Content-Type: application/json" \
 	  -d '{
 	    "prompt": "A cute baby sea otter",
 	    "n": 1,
 	    "size": "512x512"
 	  }'
 *
 */
 // VideoEndpoint
 // @Summary Creates a video given a prompt.
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /video [post]
 func VideoEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VideoRequest)
 		if !ok || input.Model == "" {
 			log.Error().Msg("Video Endpoint - Invalid Input")
 			return fiber.ErrBadRequest
 		}
 		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
 		if !ok || config == nil {
 			log.Error().Msg("Video Endpoint - Invalid Config")
 			return fiber.ErrBadRequest
 		}
 		src := ""
 		if input.StartImage != "" {
 			var fileData []byte
 			var err error
 			// check if input.File is an URL, if so download it and save it
 			// to a temporary file
 			if strings.HasPrefix(input.StartImage, "http://") || strings.HasPrefix(input.StartImage, "https://") {
 				out, err := downloadFile(input.StartImage)
 				if err != nil {
 					return fmt.Errorf("failed downloading file:%w", err)
 				}
 				defer os.RemoveAll(out)
 				fileData, err = os.ReadFile(out)
 				if err != nil {
 					return fmt.Errorf("failed reading file:%w", err)
 				}
 			} else {
 				// base 64 decode the file and write it somewhere
 				// that we will cleanup
 				fileData, err = base64.StdEncoding.DecodeString(input.StartImage)
 				if err != nil {
 					return err
 				}
 			}
 			// Create a temporary file
 			outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
 			if err != nil {
 				return err
 			}
 			// write the base64 result
 			writer := bufio.NewWriter(outputFile)
 			_, err = writer.Write(fileData)
 			if err != nil {
 				outputFile.Close()
 				return err
 			}
 			outputFile.Close()
 			src = outputFile.Name()
 			defer os.RemoveAll(src)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		switch config.Backend {
 		case "stablediffusion":
 			config.Backend = model.StableDiffusionGGMLBackend
 		case "":
 			config.Backend = model.StableDiffusionGGMLBackend
 		}
 		width := input.Width
 		height := input.Height
 		if width == 0 {
 			width = 512
 		}
 		if height == 0 {
 			height = 512
 		}
 		b64JSON := input.ResponseFormat == "b64_json"
 		tempDir := ""
 		if !b64JSON {
 			tempDir = filepath.Join(appConfig.GeneratedContentDir, "videos")
 		}
 		// Create a temporary file
 		outputFile, err := os.CreateTemp(tempDir, "b64")
 		if err != nil {
 			return err
 		}
 		outputFile.Close()
 		// TODO: use mime type to determine the extension
 		output := outputFile.Name() + ".mp4"
 		// Rename the temporary file
 		err = os.Rename(outputFile.Name(), output)
 		if err != nil {
 			return err
 		}
 		baseURL := c.BaseURL()
 		fn, err := backend.VideoGeneration(height, width, input.Prompt, src, input.EndImage, output, ml, *config, appConfig)
 		if err != nil {
 			return err
 		}
 		if err := fn(); err != nil {
 			return err
 		}
 		item := &schema.Item{}
 		if b64JSON {
 			defer os.RemoveAll(output)
 			data, err := os.ReadFile(output)
 			if err != nil {
 				return err
 			}
 			item.B64JSON = base64.StdEncoding.EncodeToString(data)
 		} else {
 			base := filepath.Base(output)
 			item.URL = baseURL + "/generated-videos/" + base
 		}
 		id := uuid.New().String()
 		created := int(time.Now().Unix())
 		resp := &schema.OpenAIResponse{
 			ID:      id,
 			Created: created,
 			Data:    []schema.Item{*item},
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 			log.Error().Msg("Image Endpoint - Invalid Input")
 			return fiber.ErrBadRequest
 		}
-		
+
 		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
 		if !ok || config == nil {
 			log.Error().Msg("Image Endpoint - Invalid Config")
@ -108,7 +108,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 			}
 			// Create a temporary file
-			outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64")
+			outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
 			if err != nil {
 				return err
 			}
@ -184,7 +184,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 				tempDir := ""
 				if !b64JSON {
-					tempDir = appConfig.ImageDir
+					tempDir = filepath.Join(appConfig.GeneratedContentDir, "images")
 				}
 				// Create a temporary file
 				outputFile, err := os.CreateTemp(tempDir, "b64")
@ -192,6 +192,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 					return err
 				}
 				outputFile.Close()
 				output := outputFile.Name() + ".png"
 				// Rename the temporary file
 				err = os.Rename(outputFile.Name(), output)
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@ -59,6 +59,11 @@ func RegisterLocalAIRoutes(router *fiber.App,
 		router.Get("/metrics", localai.LocalAIMetricsEndpoint())
 	}
 	router.Post("/video",
 		requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)),
 		requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VideoRequest) }),
 		localai.VideoEndpoint(cl, ml, appConfig))
 	// Backend Statistics Module
 	// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@ -24,6 +24,20 @@ type GalleryResponse struct {
 	StatusURL string `json:"status"`
 }
 type VideoRequest struct {
 	BasicModelRequest
 	Prompt         string  `json:"prompt" yaml:"prompt"`
 	StartImage     string  `json:"start_image" yaml:"start_image"`
 	EndImage       string  `json:"end_image" yaml:"end_image"`
 	Width          int32   `json:"width" yaml:"width"`
 	Height         int32   `json:"height" yaml:"height"`
 	NumFrames      int32   `json:"num_frames" yaml:"num_frames"`
 	FPS            int32   `json:"fps" yaml:"fps"`
 	Seed           int32   `json:"seed" yaml:"seed"`
 	CFGScale       float32 `json:"cfg_scale" yaml:"cfg_scale"`
 	ResponseFormat string  `json:"response_format" yaml:"response_format"`
 }
 // @Description TTS request body
 type TTSRequest struct {
 	BasicModelRequest
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@ -481,8 +481,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
 |-----------|---------|-------------|----------------------|
 | --models-path | BASEPATH/models | Path containing models used for inferencing  | $LOCALAI_MODELS_PATH |
 | --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH |
-| --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH |
+| --generated-content-path | /tmp/generated/content | Location for assets generated by backends (e.g. stablediffusion) | $LOCALAI_GENERATED_CONTENT_PATH |
 | --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH |
 | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
 | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
 | --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@ -278,3 +278,36 @@ docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=
 ```
 Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
 ## Vulkan acceleration
 ### Requirements
 If using nvidia, follow the steps in the [CUDA](#cudanvidia-acceleration) section to configure your docker runtime to allow access to the GPU.
 ### Container images
 To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}}-vulkan-ffmpeg-core`.
 #### Example
 To run LocalAI with Docker and Vulkan, you can use the following command as an example:
 ```bash
 docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core
 ```
 ### Notes
 In addition to the commands to run LocalAI normally, you need to specify additonal flags to pass the GPU hardware to the container.
 These flags are the same as the sections above, depending on the hardware, for [nvidia](#cudanvidia-acceleration), [AMD](#rocmamd-acceleration) or [Intel](#intel-acceleration-sycl).
 If you have mixed hardware, you can pass flags for multiple GPUs, for example:
 ```bash
 docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \
 --gpus=all \ # nvidia passthrough
 --device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
 localai/localai:latest-vulkan-ffmpeg-core
 ```
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -90,12 +90,16 @@
    You can find the half-precision version here.
  overrides:
    mmproj: mmproj-google_gemma-3-12b-it-qat-f16.gguf
    parameters:
      model: google_gemma-3-12b-it-qat-Q4_0.gguf
  files:
    - filename: google_gemma-3-12b-it-qat-Q4_0.gguf
      sha256: 2ad4c9ce431a2d5b80af37983828c2cfb8f4909792ca5075e0370e3a71ca013d
      uri: huggingface://bartowski/google_gemma-3-12b-it-qat-GGUF/google_gemma-3-12b-it-qat-Q4_0.gguf
    - filename: mmproj-google_gemma-3-12b-it-qat-f16.gguf
      sha256: 30c02d056410848227001830866e0a269fcc28aaf8ca971bded494003de9f5a5
      uri: huggingface://bartowski/google_gemma-3-12b-it-qat-GGUF/mmproj-google_gemma-3-12b-it-qat-f16.gguf
 - !!merge <<: *gemma3
  name: "gemma-3-4b-it-qat"
  urls:
@ -108,12 +112,16 @@
    You can find the half-precision version here.
  overrides:
    mmproj: mmproj-google_gemma-3-4b-it-qat-f16.gguf
    parameters:
      model: google_gemma-3-4b-it-qat-Q4_0.gguf
  files:
    - filename: google_gemma-3-4b-it-qat-Q4_0.gguf
      sha256: 0231e2cba887f4c7834c39b34251e26b2eebbb71dfac0f7e6e2b2c2531c1a583
      uri: huggingface://bartowski/google_gemma-3-4b-it-qat-GGUF/google_gemma-3-4b-it-qat-Q4_0.gguf
    - filename: mmproj-google_gemma-3-4b-it-qat-f16.gguf
      sha256: 8c0fb064b019a6972856aaae2c7e4792858af3ca4561be2dbf649123ba6c40cb
      uri: huggingface://bartowski/google_gemma-3-4b-it-qat-GGUF/mmproj-google_gemma-3-4b-it-qat-f16.gguf
 - !!merge <<: *gemma3
  name: "gemma-3-27b-it-qat"
  urls:
@ -126,12 +134,16 @@
    You can find the half-precision version here.
  overrides:
    mmproj: mmproj-google_gemma-3-27b-it-qat-f16.gguf
    parameters:
      model: google_gemma-3-27b-it-qat-Q4_0.gguf
  files:
-    - filename: gemma-3-27b-it-q4_0.gguf
+    - filename: google_gemma-3-27b-it-qat-Q4_0.gguf
      sha256: 4f1e32db877a9339df2d6529c1635570425cbe81f0aa3f7dd5d1452f2e632b42
      uri: huggingface://bartowski/google_gemma-3-27b-it-qat-GGUF/google_gemma-3-27b-it-qat-Q4_0.gguf
    - filename: mmproj-google_gemma-3-27b-it-qat-f16.gguf
      sha256: 54cb61c842fe49ac3c89bc1a614a2778163eb49f3dec2b90ff688b4c0392cb48
      uri: huggingface://bartowski/google_gemma-3-27b-it-qat-GGUF/mmproj-google_gemma-3-27b-it-qat-f16.gguf
 - !!merge <<: *gemma3
  name: "qgallouedec_gemma-3-27b-it-codeforces-sft"
  urls:
@ -1661,6 +1673,45 @@
    - filename: Llama_3.3_70b_DarkHorse.i1-Q4_K_M.gguf
      sha256: 413a0b9203326ea78fdbdcfd89a3e0475a18f0f73fee3a6bfe1327e7b48942e2
      uri: huggingface://mradermacher/Llama_3.3_70b_DarkHorse-i1-GGUF/Llama_3.3_70b_DarkHorse.i1-Q4_K_M.gguf
 - !!merge <<: *llama33
  name: "l3.3-geneticlemonade-unleashed-v2-70b"
  icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/0GTX4-erpPflLOkfH5sU5.png
  urls:
    - https://huggingface.co/zerofata/L3.3-GeneticLemonade-Unleashed-v2-70B
    - https://huggingface.co/mradermacher/L3.3-GeneticLemonade-Unleashed-v2-70B-GGUF
  description: |
    An experimental release.
    zerofata/GeneticLemonade-Unleashed qlora trained on a test dataset. Performance is improved from the original in my testing, but there are possibly (likely?) areas where the model will underperform which I am looking for feedback on.
    This is a creative model intended to excel at character driven RP / ERP. It has not been tested or trained on adventure stories or any large amounts of creative writing.
  overrides:
    parameters:
      model: L3.3-GeneticLemonade-Unleashed-v2-70B.Q4_K_M.gguf
  files:
    - filename: L3.3-GeneticLemonade-Unleashed-v2-70B.Q4_K_M.gguf
      sha256: 347f0b7cea9926537643dafbe442d830734399bb6e6ff6c5bc0f69e583444548
      uri: huggingface://mradermacher/L3.3-GeneticLemonade-Unleashed-v2-70B-GGUF/L3.3-GeneticLemonade-Unleashed-v2-70B.Q4_K_M.gguf
 - !!merge <<: *llama33
  name: "l3.3-genetic-lemonade-sunset-70b"
  icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/txglu74hAoRrQw91rESrD.png
  urls:
    - https://huggingface.co/zerofata/L3.3-Genetic-Lemonade-Sunset-70B
    - https://huggingface.co/mradermacher/L3.3-Genetic-Lemonade-Sunset-70B-GGUF
  description: |
    Inspired to learn how to merge by the Nevoria series from SteelSkull.
    I wasn't planning to release any more models in this series, but I wasn't fully satisfied with Unleashed or the Final version. I happened upon the below when testing merges and found myself coming back to it, so decided to publish.
    Model Comparison
    Designed for RP and creative writing, all three models are focused around striking a balance between writing style, creativity and intelligence.
  overrides:
    parameters:
      model: L3.3-Genetic-Lemonade-Sunset-70B.Q4_K_M.gguf
  files:
    - filename: L3.3-Genetic-Lemonade-Sunset-70B.Q4_K_M.gguf
      sha256: 743c11180c0c9168c0fe31a97f9d2efe0dd749c2797d749821fcb1d6932c19f7
      uri: huggingface://mradermacher/L3.3-Genetic-Lemonade-Sunset-70B-GGUF/L3.3-Genetic-Lemonade-Sunset-70B.Q4_K_M.gguf
 - &rwkv
  url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
  name: "rwkv-6-world-7b"
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@ -39,6 +39,7 @@ type Backend interface {
 	LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
 	PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
 	GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@ -53,6 +53,10 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
 	return fmt.Errorf("unimplemented")
 }
 func (llm *Base) GenerateVideo(*pb.GenerateVideoRequest) error {
 	return fmt.Errorf("unimplemented")
 }
 func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) {
 	return pb.TranscriptResult{}, fmt.Errorf("unimplemented")
 }
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@ -215,6 +215,28 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
 	return client.GenerateImage(ctx, in, opts...)
 }
 func (c *Client) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
 	if !c.parallel {
 		c.opMutex.Lock()
 		defer c.opMutex.Unlock()
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
 	c.wdMark()
 	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
 		grpc.WithDefaultCallOptions(
 			grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
 			grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
 		))
 	if err != nil {
 		return nil, err
 	}
 	defer conn.Close()
 	client := pb.NewBackendClient(conn)
 	return client.GenerateVideo(ctx, in, opts...)
 }
 func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
 	if !c.parallel {
 		c.opMutex.Lock()
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@ -47,6 +47,10 @@ func (e *embedBackend) GenerateImage(ctx context.Context, in *pb.GenerateImageRe
 	return e.s.GenerateImage(ctx, in)
 }
 func (e *embedBackend) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) {
 	return e.s.GenerateVideo(ctx, in)
 }
 func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
 	return e.s.TTS(ctx, in)
 }
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@ -14,6 +14,7 @@ type LLM interface {
 	Load(*pb.ModelOptions) error
 	Embeddings(*pb.PredictOptions) ([]float32, error)
 	GenerateImage(*pb.GenerateImageRequest) error
 	GenerateVideo(*pb.GenerateVideoRequest) error
 	AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error)
 	TTS(*pb.TTSRequest) error
 	SoundGeneration(*pb.SoundGenerationRequest) error
--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@ -75,6 +75,18 @@ func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest)
 	return &pb.Result{Message: "Image generated", Success: true}, nil
 }
 func (s *server) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest) (*pb.Result, error) {
 	if s.llm.Locking() {
 		s.llm.Lock()
 		defer s.llm.Unlock()
 	}
 	err := s.llm.GenerateVideo(in)
 	if err != nil {
 		return &pb.Result{Message: fmt.Sprintf("Error generating video: %s", err.Error()), Success: false}, err
 	}
 	return &pb.Result{Message: "Video generated", Success: true}, nil
 }
 func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) {
 	if s.llm.Locking() {
 		s.llm.Lock()
--- a/swagger/docs.go
+++ b/swagger/docs.go
@ -812,6 +812,30 @@ const docTemplate = `{
                    }
                }
            }
        },
        "/video": {
            "post": {
                "summary": "Creates a video given a prompt.",
                "parameters": [
                    {
                        "description": "query params",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/schema.OpenAIRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Response",
                        "schema": {
                            "$ref": "#/definitions/schema.OpenAIResponse"
                        }
                    }
                }
            }
        }
    },
    "definitions": {
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@ -805,6 +805,30 @@
                    }
                }
            }
        },
        "/video": {
            "post": {
                "summary": "Creates a video given a prompt.",
                "parameters": [
                    {
                        "description": "query params",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/schema.OpenAIRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Response",
                        "schema": {
                            "$ref": "#/definitions/schema.OpenAIResponse"
                        }
                    }
                }
            }
        }
    },
    "definitions": {
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@ -1248,6 +1248,21 @@ paths:
          schema:
            $ref: '#/definitions/proto.VADResponse'
      summary: Detect voice fragments in an audio stream
  /video:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenAIRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: Creates a video given a prompt.
 securityDefinitions:
  BearerAuth:
    in: header