ci: add GPU tests (#1095)

* ci: test GPU

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: show logs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Debug

* debug

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* split extra/core images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* split extra/core images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* consider runner host dir

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-10-19 13:50:40 +02:00 committed by GitHub
parent 45370c212b
commit 432513c3ba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 242 additions and 30 deletions

View file

@ -0,0 +1,17 @@
context_size: 2048
mirostat: 2
mirostat_tau: 5.0
mirostat_eta: 0.1
f16: true
threads: 1
gpu_layers: 90
name: gpt-4
mmap: true
parameters:
model: ggllm-test-model.bin
rope_freq_base: 10000
max_tokens: 20
rope_freq_scale: 1
temperature: 0.2
top_k: 40
top_p: 0.95

View file

@ -0,0 +1,18 @@
package e2e_test
import (
"os"
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var (
localAIURL = os.Getenv("LOCALAI_API")
)
func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI E2E test suite")
}

70
tests/e2e/e2e_test.go Normal file
View file

@ -0,0 +1,70 @@
package e2e_test
import (
"context"
"fmt"
"os"
"os/exec"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai"
)
var _ = Describe("E2E test", func() {
var client *openai.Client
var client2 *openaigo.Client
Context("API with ephemeral models", func() {
BeforeEach(func() {
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = localAIURL
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
// Check that the GPU was used
AfterEach(func() {
cmd := exec.Command("/bin/bash", "-xce", "docker logs $(docker ps -q --filter ancestor=localai-tests)")
out, err := cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred(), string(out))
// Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output
if os.Getenv("BUILD_TYPE") == "cublas" {
Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"), string(out))
Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"), string(out))
} else {
fmt.Println("Skipping GPU check")
Expect(string(out)).To(ContainSubstring("[llama-cpp] Loads OK"), string(out))
Expect(string(out)).To(ContainSubstring("llama_model_loader"), string(out))
}
})
Context("Generates text", func() {
It("streams chat tokens", func() {
model := "gpt-4"
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{
Model: model, Messages: []openai.ChatCompletionMessage{
{
Role: "user",
Content: "How much is 2+2?",
},
}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
})
})
})
})