mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat(llama.cpp): upgrade and use libmtmd (#5379)
Some checks are pending
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas-extras, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-extras) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16-extras, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32-extras, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11-extras, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11-extras) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12-extras, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12-extras) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run
Some checks are pending
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas-extras, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-extras) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16-extras, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32-extras, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11-extras, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11-extras) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12-extras, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12-extras) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run
* WIP
* wip
* wip
* Make it compile
* Update json.hpp
* this shouldn't be private for now
* Add logs
* Reset auto detected template
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Re-enable grammars
* This seems to be broken - 360a9c98e1 (diff-a18a8e64e12a01167d8e98fc)
[…]cccf0d4eed09d76d879L2998-L3207
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Placeholder
* Simplify image loading
* use completion type
* disable streaming
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* correctly return timings
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Remove some debug logging
* Adapt tests
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Keep header
* embedding: do not use oai type
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Sync from server.cpp
* Use utils and json directly from llama.cpp
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Sync with upstream
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fix: copy json.hpp from the correct location
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fix: add httplib
* sync llama.cpp
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Embeddiongs: set OAICOMPAT_TYPE_EMBEDDING
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat: sync with server.cpp by including it
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* make it darwin-compatible
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
6ef383033b
commit
6d5bde860b
8 changed files with 648 additions and 27490 deletions
|
@ -21,7 +21,8 @@ type MultimodalContent struct {
|
|||
ID int
|
||||
}
|
||||
|
||||
const DefaultMultiModalTemplate = "{{ range .Audio }}[audio-{{.ID}}]{{end}}{{ range .Images }}[img-{{.ID}}]{{end}}{{ range .Video }}[vid-{{.ID}}]{{end}}{{.Text}}"
|
||||
// https://github.com/ggml-org/llama.cpp/blob/be1d4a13db26750fac702ceb3af88ae4f39dc9f4/tools/mtmd/mtmd.h#L42
|
||||
const DefaultMultiModalTemplate = "{{ range .Audio }}[audio-{{.ID}}]{{end}}{{ range .Images }}<__image__>{{end}}{{ range .Video }}[vid-{{.ID}}]{{end}}{{.Text}}"
|
||||
|
||||
func TemplateMultiModal(templateString string, opts MultiModalOptions, text string) (string, error) {
|
||||
if templateString == "" {
|
||||
|
|
|
@ -20,7 +20,7 @@ var _ = Describe("EvaluateTemplate", func() {
|
|||
VideosInMessage: 0,
|
||||
}, "bar")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(result).To(Equal("[img-0]bar"))
|
||||
Expect(result).To(Equal("<__image__>bar"))
|
||||
})
|
||||
|
||||
It("should handle messages with more images correctly", func() {
|
||||
|
@ -33,7 +33,7 @@ var _ = Describe("EvaluateTemplate", func() {
|
|||
VideosInMessage: 0,
|
||||
}, "bar")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(result).To(Equal("[img-0][img-1]bar"))
|
||||
Expect(result).To(Equal("<__image__><__image__>bar"))
|
||||
})
|
||||
It("should handle messages with more images correctly", func() {
|
||||
result, err := TemplateMultiModal("", MultiModalOptions{
|
||||
|
@ -45,7 +45,7 @@ var _ = Describe("EvaluateTemplate", func() {
|
|||
VideosInMessage: 0,
|
||||
}, "bar")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(result).To(Equal("[audio-0][img-2][img-3]bar"))
|
||||
Expect(result).To(Equal("[audio-0]<__image__><__image__>bar"))
|
||||
})
|
||||
It("should handle messages with more images correctly", func() {
|
||||
result, err := TemplateMultiModal("", MultiModalOptions{
|
||||
|
@ -57,7 +57,7 @@ var _ = Describe("EvaluateTemplate", func() {
|
|||
VideosInMessage: 0,
|
||||
}, "bar")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(result).To(Equal("[audio-0][img-2]bar"))
|
||||
Expect(result).To(Equal("[audio-0]<__image__>bar"))
|
||||
})
|
||||
It("should handle messages with more images correctly", func() {
|
||||
result, err := TemplateMultiModal("", MultiModalOptions{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue