From fd68bf708402b3e7206f751e0254ff07df563434 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 11 Feb 2024 11:20:00 +0100 Subject: [PATCH] fix(vall-e-x): Fix voice cloning (#1696) --- backend/python/vall-e-x/ttsvalle.py | 4 ++++ docs/content/docs/features/text-to-audio.md | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/python/vall-e-x/ttsvalle.py b/backend/python/vall-e-x/ttsvalle.py index d7c5d700..fc9d93bd 100644 --- a/backend/python/vall-e-x/ttsvalle.py +++ b/backend/python/vall-e-x/ttsvalle.py @@ -55,6 +55,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): print("Preparing models, please wait", file=sys.stderr) # download and load all models preload_models() + self.clonedVoice = False # Assume directory from request.ModelFile. # Only if request.LoraAdapter it's not an absolute path if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath): @@ -65,6 +66,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.AudioPath != "": print("Generating model", file=sys.stderr) make_prompt(name=model_name, audio_prompt_path=request.AudioPath) + self.clonedVoice = True ### Use given transcript ##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav", ## transcript="Just, what was that? Paimon thought we were gonna get eaten.") @@ -91,6 +93,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): try: audio_array = None if model != "": + if self.clonedVoice: + model = os.path.basename(request.model) audio_array = generate_audio(request.text, prompt=model) else: audio_array = generate_audio(request.text) diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md index 68dfbaad..57b783ee 100644 --- a/docs/content/docs/features/text-to-audio.md +++ b/docs/content/docs/features/text-to-audio.md @@ -144,15 +144,15 @@ parameters: model: "cloned-voice" vall-e: # The path to the audio file to be cloned - # relative to the models directory - audio_path: "path-to-wav-source.wav" + # relative to the models directory + # Max 15s + audio_path: "audio-sample.wav" ``` Then you can specify the model name in the requests: ``` curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "backend": "vall-e-x", "model": "cloned-voice", "input":"Hello!" }' | aplay