TTS API improvements (#2308)

* update doc on COQUI_LANGUAGE env variable Signed-off-by: blob42 <contact@blob42.xyz> * return errors from tts gRPC backend Signed-off-by: blob42 <contact@blob42.xyz> * handle speaker_id and language in coqui TTS backend Signed-off-by: blob42 <contact@blob42.xyz> * TTS endpoint: add optional language paramter Signed-off-by: blob42 <contact@blob42.xyz> * tts fix: empty language string breaks non-multilingual models Signed-off-by: blob42 <contact@blob42.xyz> * allow tts param definition in config file - consolidate TTS options under `tts` config entry Signed-off-by: blob42 <contact@blob42.xyz> * tts: update doc Signed-off-by: blob42 <contact@blob42.xyz> --------- Signed-off-by: blob42 <contact@blob42.xyz> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-20 10:35:01 +00:00 · 2024-06-01 20:26:27 +02:00 · 2024-06-01 20:26:27 +02:00 · b99182c8d4
commit b99182c8d4
parent 95c65d67f5
10 changed files with 166 additions and 78 deletions
--- a/backend/python/coqui/backend.py
+++ b/backend/python/coqui/backend.py
@ -66,7 +66,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

    def TTS(self, request, context):
        try:
-            self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=COQUI_LANGUAGE, file_path=request.dst)
+            # if model is multilangual add language from request or env as fallback
+            lang = request.language or COQUI_LANGUAGE
+            if lang == "":
+                lang = None
+            if self.tts.is_multi_lingual and lang is None:
+               return backend_pb2.Result(success=False, message=f"Model is multi-lingual, but no language was provided")
+
+            # if model is multi-speaker, use speaker_wav or the speaker_id from request.voice
+            if self.tts.is_multi_speaker and self.AudioPath is None and request.voice is None:
+                return backend_pb2.Result(success=False, message=f"Model is multi-speaker, but no speaker was provided")
+
+            if self.tts.is_multi_speaker and request.voice is not None:
+               self.tts.tts_to_file(text=request.text, speaker=request.voice, language=lang, file_path=request.dst)
+            else:
+                self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=lang, file_path=request.dst)
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        return backend_pb2.Result(success=True)