mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-30 23:44:59 +00:00
feat(whisper): add translate option (#2649)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
9e6dec0bc4
commit
03b1cf51fd
7 changed files with 18 additions and 8 deletions
|
@ -11,7 +11,7 @@ import (
|
|||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
|
||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
||||
model.WithBackendString(model.WhisperBackend),
|
||||
|
@ -31,8 +31,9 @@ func ModelTranscription(audio, language string, ml *model.ModelLoader, backendCo
|
|||
}
|
||||
|
||||
return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
|
||||
Dst: audio,
|
||||
Language: language,
|
||||
Threads: uint32(*backendConfig.Threads),
|
||||
Dst: audio,
|
||||
Language: language,
|
||||
Translate: translate,
|
||||
Threads: uint32(*backendConfig.Threads),
|
||||
})
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ type TranscriptCMD struct {
|
|||
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
|
||||
Model string `short:"m" required:"" help:"Model name to run the TTS"`
|
||||
Language string `short:"l" help:"Language of the audio file"`
|
||||
Translate bool `short:"t" help:"Translate the transcription to english"`
|
||||
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
|
@ -50,7 +51,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
|||
}
|
||||
}()
|
||||
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, ml, c, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||
|
||||
log.Debug().Msgf("Audio file copied to: %+v", dst)
|
||||
|
||||
tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig)
|
||||
tr, err := backend.ModelTranscription(dst, input.Language, input.Translate, ml, *config, appConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -8,6 +8,9 @@ type PredictionOptions struct {
|
|||
// Also part of the OpenAI official spec
|
||||
Language string `json:"language"`
|
||||
|
||||
// Only for audio transcription
|
||||
Translate bool `json:"translate"`
|
||||
|
||||
// Also part of the OpenAI official spec. use it for returning multiple results
|
||||
N int `json:"n"`
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue