feat(tts): add Elevenlabs and OpenAI TTS compatibility layer (#1834)

* feat(elevenlabs): map elevenlabs API support to TTS This allows elevenlabs Clients to work automatically with LocalAI by supporting the elevenlabs API. The elevenlabs server endpoint is implemented such as it is wired to the TTS endpoints. Fixes: https://github.com/mudler/LocalAI/issues/1809 * feat(openai/tts): compat layer with openai tts Fixes: #1276 * fix: adapt tts CLI
2025-05-20 10:35:01 +00:00 · 2024-03-14 23:08:34 +01:00 · 2024-03-14 23:08:34 +01:00 · 20136ca8b7
commit 20136ca8b7
parent 45d520f913
24 changed files with 454 additions and 338 deletions
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@ -29,7 +29,7 @@ func generateUniqueFileName(dir, baseName, ext string) string {
 	}
 }

-func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
+func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
 	bb := backend
 	if bb == "" {
 		bb = model.PiperBackend
@ -44,12 +44,12 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appCon
 		model.WithAssetDir(appConfig.AssetsDestination),
 		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 	})
-	piperModel, err := loader.BackendLoader(opts...)
+	ttsModel, err := loader.BackendLoader(opts...)
 	if err != nil {
 		return "", nil, err
 	}

-	if piperModel == nil {
+	if ttsModel == nil {
 		return "", nil, fmt.Errorf("could not load piper model")
 	}

@ -57,25 +57,31 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appCon
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}

-	fileName := generateUniqueFileName(appConfig.AudioDir, "piper", ".wav")
+	fileName := generateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
 	filePath := filepath.Join(appConfig.AudioDir, fileName)

 	// If the model file is not empty, we pass it joined with the model path
 	modelPath := ""
 	if modelFile != "" {
-		if bb != model.TransformersMusicGen {
-			modelPath = filepath.Join(loader.ModelPath, modelFile)
-			if err := utils.VerifyPath(modelPath, appConfig.ModelPath); err != nil {
+		// If the model file is not empty, we pass it joined with the model path
+		// Checking first that it exists and is not outside ModelPath
+		// TODO: we should actually first check if the modelFile is looking like
+		// a FS path
+		mp := filepath.Join(loader.ModelPath, modelFile)
+		if _, err := os.Stat(mp); err == nil {
+			if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
 				return "", nil, err
 			}
+			modelPath = mp
 		} else {
 			modelPath = modelFile
 		}
 	}

-	res, err := piperModel.TTS(context.Background(), &proto.TTSRequest{
+	res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
 		Text:  text,
 		Model: modelPath,
+		Voice: voice,
 		Dst:   filePath,
 	})