feat: Realtime API support reboot (#5392)

* feat(realtime): Initial Realtime API implementation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: go mod tidy Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat: Implement transcription only mode for realtime API Reduce the scope of the real time API for the initial realease and make transcription only mode functional. Signed-off-by: Richard Palethorpe <io@richiejp.com> * chore(build): Build backends on a separate layer to speed up core only changes Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Richard Palethorpe <io@richiejp.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-30 15:35:01 +00:00 · 2025-05-25 21:25:05 +01:00 · 2025-05-25 21:25:05 +01:00 · bf6426aef2
commit bf6426aef2
parent 4a91950848
18 changed files with 2953 additions and 70 deletions
--- a/backend/backend.proto
+++ b/backend/backend.proto
@ -162,6 +162,7 @@ message Reply {
  int32 prompt_tokens = 3;
  double timing_prompt_processing = 4;
  double timing_token_generation = 5;
+  bytes audio = 6;
 }

 message GrammarTrigger {
--- a/backend/go/vad/silero/vad.go
+++ b/backend/go/vad/silero/vad.go
@ -21,8 +21,8 @@ func (vad *VAD) Load(opts *pb.ModelOptions) error {
 		SampleRate: 16000,
 		//WindowSize:           1024,
 		Threshold:            0.5,
-		MinSilenceDurationMs: 0,
-		SpeechPadMs:          0,
+		MinSilenceDurationMs: 100,
+		SpeechPadMs:          30,
 	})
 	if err != nil {
 		return fmt.Errorf("create silero detector: %w", err)
@ -35,6 +35,10 @@ func (vad *VAD) Load(opts *pb.ModelOptions) error {
 func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
 	audio := req.Audio

+	if err := vad.detector.Reset(); err != nil {
+		return pb.VADResponse{}, fmt.Errorf("reset: %w", err)
+	}
+
 	segments, err := vad.detector.Detect(audio)
 	if err != nil {
 		return pb.VADResponse{}, fmt.Errorf("detect: %w", err)