ci: add GPU tests (#1095)

* ci: test GPU Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: show logs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug * debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * split extra/core images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * split extra/core images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * consider runner host dir Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 02:24:59 +00:00 · 2023-10-19 13:50:40 +02:00 · 2023-10-19 13:50:40 +02:00 · 432513c3ba
commit 432513c3ba
parent 45370c212b
6 changed files with 242 additions and 30 deletions
--- a/tests/e2e-fixtures/gpu.yaml
+++ b/tests/e2e-fixtures/gpu.yaml
@ -0,0 +1,17 @@
+context_size: 2048
+mirostat: 2
+mirostat_tau: 5.0
+mirostat_eta: 0.1
+f16: true
+threads: 1
+gpu_layers: 90
+name: gpt-4
+mmap: true
+parameters:
+  model: ggllm-test-model.bin
+  rope_freq_base: 10000 
+  max_tokens: 20
+  rope_freq_scale: 1
+  temperature: 0.2
+  top_k: 40
+  top_p: 0.95
--- a/tests/e2e/e2e_suite_test.go
+++ b/tests/e2e/e2e_suite_test.go
@ -0,0 +1,18 @@
+package e2e_test
+
+import (
+	"os"
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var (
+	localAIURL = os.Getenv("LOCALAI_API")
+)
+
+func TestLocalAI(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "LocalAI E2E test suite")
+}
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@ -0,0 +1,70 @@
+package e2e_test
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	openaigo "github.com/otiai10/openaigo"
+	"github.com/sashabaranov/go-openai"
+)
+
+var _ = Describe("E2E test", func() {
+	var client *openai.Client
+	var client2 *openaigo.Client
+
+	Context("API with ephemeral models", func() {
+		BeforeEach(func() {
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = localAIURL
+
+			client2 = openaigo.NewClient("")
+			client2.BaseURL = defaultConfig.BaseURL
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+
+		// Check that the GPU was used
+		AfterEach(func() {
+			cmd := exec.Command("/bin/bash", "-xce", "docker logs $(docker ps -q --filter ancestor=localai-tests)")
+			out, err := cmd.CombinedOutput()
+			Expect(err).ToNot(HaveOccurred(), string(out))
+			// Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output
+			if os.Getenv("BUILD_TYPE") == "cublas" {
+
+				Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"), string(out))
+				Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"), string(out))
+			} else {
+				fmt.Println("Skipping GPU check")
+				Expect(string(out)).To(ContainSubstring("[llama-cpp] Loads OK"), string(out))
+				Expect(string(out)).To(ContainSubstring("llama_model_loader"), string(out))
+			}
+		})
+
+		Context("Generates text", func() {
+			It("streams chat tokens", func() {
+				model := "gpt-4"
+				resp, err := client.CreateChatCompletion(context.TODO(),
+					openai.ChatCompletionRequest{
+						Model: model, Messages: []openai.ChatCompletionMessage{
+							{
+								Role:    "user",
+								Content: "How much is 2+2?",
+							},
+						}})
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
+			})
+		})
+	})
+})