mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 06:25:00 +00:00
feat: elevenlabs sound-generation
api (#3355)
* initial version of elevenlabs compatible soundgeneration api and cli command Signed-off-by: Dave Lee <dave@gray101.com> * minor cleanup Signed-off-by: Dave Lee <dave@gray101.com> * restore TTS, add test Signed-off-by: Dave Lee <dave@gray101.com> * remove stray s Signed-off-by: Dave Lee <dave@gray101.com> * fix Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Dave Lee <dave@gray101.com> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
parent
84d6e5a987
commit
81ae92f017
20 changed files with 450 additions and 37 deletions
|
@ -41,6 +41,7 @@ type Backend interface {
|
|||
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
|
||||
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error)
|
||||
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
|
||||
Status(ctx context.Context) (*pb.StatusResponse, error)
|
||||
|
|
|
@ -61,6 +61,10 @@ func (llm *Base) TTS(*pb.TTSRequest) error {
|
|||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) SoundGeneration(*pb.SoundGenerationRequest) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
||||
return pb.TokenizationResponse{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
|
|
@ -210,6 +210,26 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
|
|||
return client.TTS(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
if c.wd != nil {
|
||||
c.wd.Mark(c.address)
|
||||
defer c.wd.UnMark(c.address)
|
||||
}
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
return client.SoundGeneration(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
|
|
|
@ -53,6 +53,10 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.
|
|||
return e.s.TTS(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
return e.s.SoundGeneration(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
|
||||
r, err := e.s.AudioTranscription(ctx, in)
|
||||
if err != nil {
|
||||
|
|
|
@ -17,6 +17,7 @@ type LLM interface {
|
|||
GenerateImage(*pb.GenerateImageRequest) error
|
||||
AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error)
|
||||
TTS(*pb.TTSRequest) error
|
||||
SoundGeneration(*pb.SoundGenerationRequest) error
|
||||
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
|
||||
Status() (pb.StatusResponse, error)
|
||||
|
||||
|
|
|
@ -84,7 +84,19 @@ func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error)
|
|||
if err != nil {
|
||||
return &pb.Result{Message: fmt.Sprintf("Error generating audio: %s", err.Error()), Success: false}, err
|
||||
}
|
||||
return &pb.Result{Message: "Audio generated", Success: true}, nil
|
||||
return &pb.Result{Message: "TTS audio generated", Success: true}, nil
|
||||
}
|
||||
|
||||
func (s *server) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest) (*pb.Result, error) {
|
||||
if s.llm.Locking() {
|
||||
s.llm.Lock()
|
||||
defer s.llm.Unlock()
|
||||
}
|
||||
err := s.llm.SoundGeneration(in)
|
||||
if err != nil {
|
||||
return &pb.Result{Message: fmt.Sprintf("Error generating audio: %s", err.Error()), Success: false}, err
|
||||
}
|
||||
return &pb.Result{Message: "Sound Generation audio generated", Success: true}, nil
|
||||
}
|
||||
|
||||
func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest) (*pb.TranscriptResult, error) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue