feat(tts): Implement naive response_format for tts endpoint (#4035)

Signed-off-by: n-Arno <arnaud.alcabas@gmail.com>
This commit is contained in:
Arnaud A 2024-11-02 20:13:35 +01:00 committed by GitHub
parent 57908df956
commit 65c3df392c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 61 additions and 9 deletions

View file

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"os/exec"
"strings"
)
func ffmpegCommand(args []string) (string, error) {
@ -23,3 +24,32 @@ func AudioToWav(src, dst string) error {
}
return nil
}
// AudioConvert converts generated wav file from tts to other output formats.
// TODO: handle pcm to have 100% parity of supported format from OpenAI
func AudioConvert(src string, format string) (string, error) {
extension := ""
// compute file extension from format, default to wav
switch format {
case "opus":
extension = ".ogg"
case "mp3", "aac", "flac":
extension = fmt.Sprintf(".%s", format)
default:
extension = ".wav"
}
// if .wav, do nothing
if extension == ".wav" {
return src, nil
}
// naive conversion based on default values and target extension of file
dst := strings.Replace(src, ".wav", extension, -1)
commandArgs := []string{"-y", "-i", src, "-vn", dst}
out, err := ffmpegCommand(commandArgs)
if err != nil {
return "", fmt.Errorf("error: %w out: %s", err, out)
}
return dst, nil
}