mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-27 22:15:00 +00:00
feat: move other backends to grpc
This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
5dcfdbe51d
commit
1d0ed95a54
54 changed files with 3171 additions and 1712 deletions
|
@ -173,5 +173,12 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||
app.Get("/v1/models", openai.ListModelsEndpoint(options.Loader, cm))
|
||||
app.Get("/models", openai.ListModelsEndpoint(options.Loader, cm))
|
||||
|
||||
// turn off any process that was started by GRPC if the context is canceled
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
log.Debug().Msgf("Context canceled, shutting down")
|
||||
options.Loader.StopGRPC()
|
||||
}()
|
||||
|
||||
return app, nil
|
||||
}
|
||||
|
|
233
api/api_test.go
233
api/api_test.go
|
@ -5,7 +5,9 @@ import (
|
|||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
|
@ -24,6 +26,7 @@ import (
|
|||
|
||||
openaigo "github.com/otiai10/openaigo"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
"github.com/sashabaranov/go-openai/jsonschema"
|
||||
)
|
||||
|
||||
type modelApplyRequest struct {
|
||||
|
@ -203,7 +206,7 @@ var _ = Describe("API test", func() {
|
|||
fmt.Println(response)
|
||||
resp = response
|
||||
return response["processed"].(bool)
|
||||
}, "360s").Should(Equal(true))
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
Expect(resp["message"]).ToNot(ContainSubstring("error"))
|
||||
|
||||
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert2.yaml"))
|
||||
|
@ -245,9 +248,8 @@ var _ = Describe("API test", func() {
|
|||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s").Should(Equal(true))
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
@ -270,9 +272,8 @@ var _ = Describe("API test", func() {
|
|||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s").Should(Equal(true))
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
@ -299,14 +300,58 @@ var _ = Describe("API test", func() {
|
|||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s").Should(Equal(true))
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
By("testing completion")
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
|
||||
|
||||
By("testing functions")
|
||||
resp2, err := client.CreateChatCompletion(
|
||||
context.TODO(),
|
||||
openai.ChatCompletionRequest{
|
||||
Model: "openllama_3b",
|
||||
Messages: []openai.ChatCompletionMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What is the weather like in San Francisco (celsius)?",
|
||||
},
|
||||
},
|
||||
Functions: []openai.FunctionDefinition{
|
||||
openai.FunctionDefinition{
|
||||
Name: "get_current_weather",
|
||||
Description: "Get the current weather",
|
||||
Parameters: jsonschema.Definition{
|
||||
Type: jsonschema.Object,
|
||||
Properties: map[string]jsonschema.Definition{
|
||||
"location": {
|
||||
Type: jsonschema.String,
|
||||
Description: "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {
|
||||
Type: jsonschema.String,
|
||||
Enum: []string{"celcius", "fahrenheit"},
|
||||
},
|
||||
},
|
||||
Required: []string{"location"},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp2.Choices)).To(Equal(1))
|
||||
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
|
||||
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
|
||||
|
||||
var res map[string]string
|
||||
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||
})
|
||||
|
||||
It("runs gpt4all", Label("gpt4all"), func() {
|
||||
|
@ -326,15 +371,126 @@ var _ = Describe("API test", func() {
|
|||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s").Should(Equal(true))
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well"))
|
||||
})
|
||||
|
||||
})
|
||||
})
|
||||
|
||||
Context("Model gallery", func() {
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
tmpdir, err = os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
modelLoader = model.NewModelLoader(tmpdir)
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
galleries := []gallery.Gallery{
|
||||
{
|
||||
Name: "model-gallery",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
|
||||
},
|
||||
}
|
||||
|
||||
app, err = App(
|
||||
options.WithContext(c),
|
||||
options.WithAudioDir(tmpdir),
|
||||
options.WithImageDir(tmpdir),
|
||||
options.WithGalleries(galleries),
|
||||
options.WithModelLoader(modelLoader),
|
||||
options.WithBackendAssets(backendAssets),
|
||||
options.WithBackendAssetsOutput(tmpdir),
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||
|
||||
client2 = openaigo.NewClient("")
|
||||
client2.BaseURL = defaultConfig.BaseURL
|
||||
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
_, err := client.ListModels(context.TODO())
|
||||
return err
|
||||
}, "2m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
cancel()
|
||||
app.Shutdown()
|
||||
os.RemoveAll(tmpdir)
|
||||
})
|
||||
It("installs and is capable to run tts", Label("tts"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ID: "model-gallery@voice-en-us-kathleen-low",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
||||
uuid := response["uuid"].(string)
|
||||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
// An HTTP Post to the /tts endpoint should return a wav audio file
|
||||
resp, err := http.Post("http://127.0.0.1:9090/tts", "application/json", bytes.NewBuffer([]byte(`{"input": "Hello world", "model": "en-us-kathleen-low.onnx"}`)))
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
|
||||
dat, err := io.ReadAll(resp.Body)
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
|
||||
|
||||
Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
|
||||
Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
|
||||
})
|
||||
It("installs and is capable to generate images", Label("stablediffusion"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ID: "model-gallery@stablediffusion",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
||||
uuid := response["uuid"].(string)
|
||||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
resp, err := http.Post(
|
||||
"http://127.0.0.1:9090/v1/images/generations",
|
||||
"application/json",
|
||||
bytes.NewBuffer([]byte(`{
|
||||
"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
|
||||
"mode": 2, "seed":9000,
|
||||
"size": "256x256", "n":2}`)))
|
||||
// The response should contain an URL
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
|
||||
dat, err := io.ReadAll(resp.Body)
|
||||
Expect(err).ToNot(HaveOccurred(), string(dat))
|
||||
Expect(string(dat)).To(ContainSubstring("http://127.0.0.1:9090/"), string(dat))
|
||||
Expect(string(dat)).To(ContainSubstring(".png"), string(dat))
|
||||
|
||||
})
|
||||
})
|
||||
|
||||
|
@ -401,7 +557,7 @@ var _ = Describe("API test", func() {
|
|||
It("returns errors", func() {
|
||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 11 errors occurred:"))
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
|
||||
})
|
||||
It("transcribes audio", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
|
@ -446,14 +602,67 @@ var _ = Describe("API test", func() {
|
|||
})
|
||||
|
||||
Context("backends", func() {
|
||||
It("runs rwkv", func() {
|
||||
It("runs rwkv completion", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices) > 0).To(BeTrue())
|
||||
Expect(resp.Choices[0].Text).To(Equal(" five."))
|
||||
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
|
||||
|
||||
stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
|
||||
Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer stream.Close()
|
||||
|
||||
tokens := 0
|
||||
text := ""
|
||||
for {
|
||||
response, err := stream.Recv()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
text += response.Choices[0].Text
|
||||
tokens++
|
||||
}
|
||||
Expect(text).ToNot(BeEmpty())
|
||||
Expect(text).To(ContainSubstring("five"))
|
||||
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
|
||||
})
|
||||
It("runs rwkv chat completion", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateChatCompletion(context.TODO(),
|
||||
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices) > 0).To(BeTrue())
|
||||
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
|
||||
|
||||
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer stream.Close()
|
||||
|
||||
tokens := 0
|
||||
text := ""
|
||||
for {
|
||||
response, err := stream.Recv()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
text += response.Choices[0].Delta.Content
|
||||
tokens++
|
||||
}
|
||||
Expect(text).ToNot(BeEmpty())
|
||||
Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
|
||||
|
||||
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
|
@ -9,7 +8,6 @@ import (
|
|||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
)
|
||||
|
||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.Config, o *options.Option) (func() ([]float32, error), error) {
|
||||
|
@ -25,10 +23,11 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||
var err error
|
||||
|
||||
opts := []model.Option{
|
||||
model.WithLoadGRPCOpts(grpcOpts),
|
||||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||
model.WithThreads(uint32(c.Threads)),
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithModelFile(modelFile),
|
||||
model.WithContext(o.Context),
|
||||
}
|
||||
|
||||
if c.Backend == "" {
|
||||
|
@ -54,7 +53,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||
}
|
||||
predictOptions.EmbeddingTokens = embeds
|
||||
|
||||
res, err := model.Embeddings(context.TODO(), predictOptions)
|
||||
res, err := model.Embeddings(o.Context, predictOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -63,22 +62,13 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||
}
|
||||
predictOptions.Embeddings = s
|
||||
|
||||
res, err := model.Embeddings(context.TODO(), predictOptions)
|
||||
res, err := model.Embeddings(o.Context, predictOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return res.Embeddings, nil
|
||||
}
|
||||
|
||||
// bert embeddings
|
||||
case *bert.Bert:
|
||||
fn = func() ([]float32, error) {
|
||||
if len(tokens) > 0 {
|
||||
return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
|
||||
}
|
||||
return model.Embeddings(s, bert.SetThreads(c.Threads))
|
||||
}
|
||||
default:
|
||||
fn = func() ([]float32, error) {
|
||||
return nil, fmt.Errorf("embeddings not supported by the backend")
|
||||
|
@ -87,7 +77,15 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||
|
||||
return func() ([]float32, error) {
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
l := Lock(modelFile)
|
||||
mutexMap.Lock()
|
||||
l, ok := mutexes[modelFile]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
mutexes[modelFile] = m
|
||||
l = m
|
||||
}
|
||||
mutexMap.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
embeds, err := fn()
|
||||
|
|
|
@ -6,8 +6,8 @@ import (
|
|||
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
||||
)
|
||||
|
||||
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
|
||||
|
@ -19,23 +19,27 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||
model.WithBackendString(c.Backend),
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithThreads(uint32(c.Threads)),
|
||||
model.WithContext(o.Context),
|
||||
model.WithModelFile(c.ImageGenerationAssets),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var fn func() error
|
||||
switch model := inferenceModel.(type) {
|
||||
case *stablediffusion.StableDiffusion:
|
||||
fn = func() error {
|
||||
return model.GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst)
|
||||
}
|
||||
|
||||
default:
|
||||
fn = func() error {
|
||||
return fmt.Errorf("creation of images not supported by the backend")
|
||||
}
|
||||
fn := func() error {
|
||||
_, err := inferenceModel.GenerateImage(
|
||||
o.Context,
|
||||
&proto.GenerateImageRequest{
|
||||
Height: int32(height),
|
||||
Width: int32(width),
|
||||
Mode: int32(mode),
|
||||
Step: int32(step),
|
||||
Seed: int32(seed),
|
||||
PositivePrompt: positive_prompt,
|
||||
NegativePrompt: negative_prompt,
|
||||
Dst: dst,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
return func() error {
|
||||
|
|
|
@ -1,34 +1,30 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/donomii/go-rwkv.cpp"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
)
|
||||
|
||||
func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string) bool) (func() (string, error), error) {
|
||||
supportStreams := false
|
||||
modelFile := c.Model
|
||||
|
||||
grpcOpts := gRPCModelOpts(c)
|
||||
|
||||
var inferenceModel interface{}
|
||||
var inferenceModel *grpc.Client
|
||||
var err error
|
||||
|
||||
opts := []model.Option{
|
||||
model.WithLoadGRPCOpts(grpcOpts),
|
||||
model.WithThreads(uint32(c.Threads)), // GPT4all uses this
|
||||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithModelFile(modelFile),
|
||||
model.WithContext(o.Context),
|
||||
}
|
||||
|
||||
if c.Backend == "" {
|
||||
|
@ -41,95 +37,37 @@ func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *opt
|
|||
return nil, err
|
||||
}
|
||||
|
||||
var fn func() (string, error)
|
||||
|
||||
switch model := inferenceModel.(type) {
|
||||
case *rwkv.RwkvState:
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
stopWord := "\n"
|
||||
if len(c.StopWords) > 0 {
|
||||
stopWord = c.StopWords[0]
|
||||
}
|
||||
|
||||
if err := model.ProcessInput(s); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
|
||||
|
||||
return response, nil
|
||||
}
|
||||
case *bloomz.Bloomz:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []bloomz.PredictOption{
|
||||
bloomz.SetTemperature(c.Temperature),
|
||||
bloomz.SetTopP(c.TopP),
|
||||
bloomz.SetTopK(c.TopK),
|
||||
bloomz.SetTokens(c.Maxtokens),
|
||||
bloomz.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
|
||||
case *grpc.Client:
|
||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||
supportStreams = true
|
||||
fn = func() (string, error) {
|
||||
|
||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||
opts.Prompt = s
|
||||
if tokenCallback != nil {
|
||||
ss := ""
|
||||
err := model.PredictStream(context.TODO(), opts, func(s string) {
|
||||
tokenCallback(s)
|
||||
ss += s
|
||||
})
|
||||
return ss, err
|
||||
} else {
|
||||
reply, err := model.Predict(context.TODO(), opts)
|
||||
return reply.Message, err
|
||||
}
|
||||
}
|
||||
case *langchain.HuggingFace:
|
||||
fn = func() (string, error) {
|
||||
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []langchain.PredictOption{
|
||||
langchain.SetModel(c.Model),
|
||||
langchain.SetMaxTokens(c.Maxtokens),
|
||||
langchain.SetTemperature(c.Temperature),
|
||||
langchain.SetStopWords(c.StopWords),
|
||||
}
|
||||
|
||||
pred, er := model.PredictHuggingFace(s, predictOptions...)
|
||||
if er != nil {
|
||||
return "", er
|
||||
}
|
||||
return pred.Completion, nil
|
||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||
fn := func() (string, error) {
|
||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||
opts.Prompt = s
|
||||
if tokenCallback != nil {
|
||||
ss := ""
|
||||
err := inferenceModel.PredictStream(o.Context, opts, func(s string) {
|
||||
tokenCallback(s)
|
||||
ss += s
|
||||
})
|
||||
return ss, err
|
||||
} else {
|
||||
reply, err := inferenceModel.Predict(o.Context, opts)
|
||||
return reply.Message, err
|
||||
}
|
||||
}
|
||||
|
||||
return func() (string, error) {
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
l := Lock(modelFile)
|
||||
mutexMap.Lock()
|
||||
l, ok := mutexes[modelFile]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
mutexes[modelFile] = m
|
||||
l = m
|
||||
}
|
||||
mutexMap.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
res, err := fn()
|
||||
if tokenCallback != nil && !supportStreams {
|
||||
tokenCallback(res)
|
||||
}
|
||||
return res, err
|
||||
return fn()
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -7,34 +7,8 @@ import (
|
|||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
)
|
||||
|
||||
func langchainOptions(c config.Config) []langchain.PredictOption {
|
||||
return []langchain.PredictOption{
|
||||
langchain.SetModel(c.Model),
|
||||
langchain.SetMaxTokens(c.Maxtokens),
|
||||
langchain.SetTemperature(c.Temperature),
|
||||
langchain.SetStopWords(c.StopWords),
|
||||
}
|
||||
}
|
||||
|
||||
func bloomzOptions(c config.Config) []bloomz.PredictOption {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []bloomz.PredictOption{
|
||||
bloomz.SetTemperature(c.Temperature),
|
||||
bloomz.SetTopP(c.TopP),
|
||||
bloomz.SetTopK(c.TopK),
|
||||
bloomz.SetTokens(c.Maxtokens),
|
||||
bloomz.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
|
||||
}
|
||||
return predictOptions
|
||||
}
|
||||
func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
||||
b := 512
|
||||
if c.Batch != 0 {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package localai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -8,8 +9,8 @@ import (
|
|||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/tts"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
@ -47,6 +48,7 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||
piperModel, err := o.Loader.BackendLoader(
|
||||
model.WithBackendString(model.PiperBackend),
|
||||
model.WithModelFile(input.Model),
|
||||
model.WithContext(o.Context),
|
||||
model.WithAssetDir(o.AssetsDestination))
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -56,13 +58,8 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||
return fmt.Errorf("could not load piper model")
|
||||
}
|
||||
|
||||
w, ok := piperModel.(*tts.Piper)
|
||||
if !ok {
|
||||
return fmt.Errorf("loader returned non-piper object %+v", w)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(o.AudioDir, 0755); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed creating audio directory: %s", err)
|
||||
}
|
||||
|
||||
fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav")
|
||||
|
@ -74,7 +71,11 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||
return err
|
||||
}
|
||||
|
||||
if err := w.TTS(input.Input, modelPath, filePath); err != nil {
|
||||
if _, err := piperModel.TTS(context.Background(), &proto.TTSRequest{
|
||||
Text: input.Input,
|
||||
Model: modelPath,
|
||||
Dst: filePath,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
@ -8,11 +9,10 @@ import (
|
|||
"path"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
@ -64,6 +64,7 @@ func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||
whisperModel, err := o.Loader.BackendLoader(
|
||||
model.WithBackendString(model.WhisperBackend),
|
||||
model.WithModelFile(config.Model),
|
||||
model.WithContext(o.Context),
|
||||
model.WithThreads(uint32(config.Threads)),
|
||||
model.WithAssetDir(o.AssetsDestination))
|
||||
if err != nil {
|
||||
|
@ -74,18 +75,17 @@ func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||
return fmt.Errorf("could not load whisper model")
|
||||
}
|
||||
|
||||
w, ok := whisperModel.(whisper.Model)
|
||||
if !ok {
|
||||
return fmt.Errorf("loader returned non-whisper object")
|
||||
}
|
||||
|
||||
tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
|
||||
tr, err := whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
|
||||
Dst: dst,
|
||||
Language: input.Language,
|
||||
Threads: uint32(config.Threads),
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Trascribed: %+v", tr)
|
||||
// TODO: handle different outputs here
|
||||
return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr})
|
||||
return c.Status(http.StatusOK).JSON(tr)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue