mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-09 04:14:59 +00:00
feat: Realtime API support reboot (#5392)
Some checks are pending
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas-extras, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-extras) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16-extras, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32-extras, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11-extras, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11-extras) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12-extras, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12-extras) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run
Some checks are pending
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas-extras, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-extras) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16-extras, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32-extras, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11-extras, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11-extras) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12-extras, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12-extras) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run
* feat(realtime): Initial Realtime API implementation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: go mod tidy Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat: Implement transcription only mode for realtime API Reduce the scope of the real time API for the initial realease and make transcription only mode functional. Signed-off-by: Richard Palethorpe <io@richiejp.com> * chore(build): Build backends on a separate layer to speed up core only changes Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Richard Palethorpe <io@richiejp.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
4a91950848
commit
bf6426aef2
18 changed files with 2953 additions and 70 deletions
55
pkg/audio/audio.go
Normal file
55
pkg/audio/audio.go
Normal file
|
@ -0,0 +1,55 @@
|
|||
package audio
|
||||
|
||||
// Copied from VoxInput
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
)
|
||||
|
||||
// WAVHeader represents the WAV file header (44 bytes for PCM)
|
||||
type WAVHeader struct {
|
||||
// RIFF Chunk (12 bytes)
|
||||
ChunkID [4]byte
|
||||
ChunkSize uint32
|
||||
Format [4]byte
|
||||
|
||||
// fmt Subchunk (16 bytes)
|
||||
Subchunk1ID [4]byte
|
||||
Subchunk1Size uint32
|
||||
AudioFormat uint16
|
||||
NumChannels uint16
|
||||
SampleRate uint32
|
||||
ByteRate uint32
|
||||
BlockAlign uint16
|
||||
BitsPerSample uint16
|
||||
|
||||
// data Subchunk (8 bytes)
|
||||
Subchunk2ID [4]byte
|
||||
Subchunk2Size uint32
|
||||
}
|
||||
|
||||
func NewWAVHeader(pcmLen uint32) WAVHeader {
|
||||
header := WAVHeader{
|
||||
ChunkID: [4]byte{'R', 'I', 'F', 'F'},
|
||||
Format: [4]byte{'W', 'A', 'V', 'E'},
|
||||
Subchunk1ID: [4]byte{'f', 'm', 't', ' '},
|
||||
Subchunk1Size: 16, // PCM = 16 bytes
|
||||
AudioFormat: 1, // PCM
|
||||
NumChannels: 1, // Mono
|
||||
SampleRate: 16000,
|
||||
ByteRate: 16000 * 2, // SampleRate * BlockAlign (mono, 2 bytes per sample)
|
||||
BlockAlign: 2, // 16-bit = 2 bytes per sample
|
||||
BitsPerSample: 16,
|
||||
Subchunk2ID: [4]byte{'d', 'a', 't', 'a'},
|
||||
Subchunk2Size: pcmLen,
|
||||
}
|
||||
|
||||
header.ChunkSize = 36 + header.Subchunk2Size
|
||||
|
||||
return header
|
||||
}
|
||||
|
||||
func (h *WAVHeader) Write(writer io.Writer) error {
|
||||
return binary.Write(writer, binary.LittleEndian, h)
|
||||
}
|
|
@ -35,9 +35,9 @@ type Backend interface {
|
|||
IsBusy() bool
|
||||
HealthCheck(ctx context.Context) (bool, error)
|
||||
Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
|
||||
Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
|
||||
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
|
||||
Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
|
||||
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
|
|
12
pkg/sound/float32.go
Normal file
12
pkg/sound/float32.go
Normal file
|
@ -0,0 +1,12 @@
|
|||
package sound
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math"
|
||||
)
|
||||
|
||||
func BytesFloat32(bytes []byte) float32 {
|
||||
bits := binary.LittleEndian.Uint32(bytes)
|
||||
float := math.Float32frombits(bits)
|
||||
return float
|
||||
}
|
90
pkg/sound/int16.go
Normal file
90
pkg/sound/int16.go
Normal file
|
@ -0,0 +1,90 @@
|
|||
package sound
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math"
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Xbozon
|
||||
|
||||
*/
|
||||
|
||||
// calculateRMS16 calculates the root mean square of the audio buffer for int16 samples.
|
||||
func CalculateRMS16(buffer []int16) float64 {
|
||||
var sumSquares float64
|
||||
for _, sample := range buffer {
|
||||
val := float64(sample) // Convert int16 to float64 for calculation
|
||||
sumSquares += val * val
|
||||
}
|
||||
meanSquares := sumSquares / float64(len(buffer))
|
||||
return math.Sqrt(meanSquares)
|
||||
}
|
||||
|
||||
func ResampleInt16(input []int16, inputRate, outputRate int) []int16 {
|
||||
// Calculate the resampling ratio
|
||||
ratio := float64(inputRate) / float64(outputRate)
|
||||
|
||||
// Calculate the length of the resampled output
|
||||
outputLength := int(float64(len(input)) / ratio)
|
||||
|
||||
// Allocate a slice for the resampled output
|
||||
output := make([]int16, outputLength)
|
||||
|
||||
// Perform linear interpolation for resampling
|
||||
for i := 0; i < outputLength-1; i++ {
|
||||
// Calculate the corresponding position in the input
|
||||
pos := float64(i) * ratio
|
||||
|
||||
// Calculate the indices of the surrounding input samples
|
||||
indexBefore := int(pos)
|
||||
indexAfter := indexBefore + 1
|
||||
if indexAfter >= len(input) {
|
||||
indexAfter = len(input) - 1
|
||||
}
|
||||
|
||||
// Calculate the fractional part of the position
|
||||
frac := pos - float64(indexBefore)
|
||||
|
||||
// Linearly interpolate between the two surrounding input samples
|
||||
output[i] = int16((1-frac)*float64(input[indexBefore]) + frac*float64(input[indexAfter]))
|
||||
}
|
||||
|
||||
// Handle the last sample explicitly to avoid index out of range
|
||||
output[outputLength-1] = input[len(input)-1]
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
func ConvertInt16ToInt(input []int16) []int {
|
||||
output := make([]int, len(input)) // Allocate a slice for the output
|
||||
for i, value := range input {
|
||||
output[i] = int(value) // Convert each int16 to int and assign it to the output slice
|
||||
}
|
||||
return output // Return the converted slice
|
||||
}
|
||||
|
||||
func BytesToInt16sLE(bytes []byte) []int16 {
|
||||
// Ensure the byte slice length is even
|
||||
if len(bytes)%2 != 0 {
|
||||
panic("bytesToInt16sLE: input bytes slice has odd length, must be even")
|
||||
}
|
||||
|
||||
int16s := make([]int16, len(bytes)/2)
|
||||
for i := 0; i < len(int16s); i++ {
|
||||
int16s[i] = int16(bytes[2*i]) | int16(bytes[2*i+1])<<8
|
||||
}
|
||||
return int16s
|
||||
}
|
||||
|
||||
func Int16toBytesLE(arr []int16) []byte {
|
||||
le := binary.LittleEndian
|
||||
result := make([]byte, 0, 2*len(arr))
|
||||
for _, val := range arr {
|
||||
result = le.AppendUint16(result, uint16(val))
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -5,6 +5,8 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"github.com/go-audio/wav"
|
||||
)
|
||||
|
||||
func ffmpegCommand(args []string) (string, error) {
|
||||
|
@ -17,6 +19,21 @@ func ffmpegCommand(args []string) (string, error) {
|
|||
// AudioToWav converts audio to wav for transcribe.
|
||||
// TODO: use https://github.com/mccoyst/ogg?
|
||||
func AudioToWav(src, dst string) error {
|
||||
if strings.HasSuffix(src, ".wav") {
|
||||
f, err := os.Open(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open: %w", err)
|
||||
}
|
||||
|
||||
dec := wav.NewDecoder(f)
|
||||
dec.ReadInfo()
|
||||
f.Close()
|
||||
|
||||
if dec.BitDepth == 16 && dec.NumChans == 1 && dec.SampleRate == 16000 {
|
||||
os.Rename(src, dst)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
||||
out, err := ffmpegCommand(commandArgs)
|
||||
if err != nil {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue