From 2d64269763fe49ba4673b790b04c4aa7a18efaf2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 15 Jun 2025 14:56:52 +0200 Subject: [PATCH] feat: Add backend gallery (#5607) * feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto * Add backends docs Signed-off-by: Ettore Di Giacinto * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto * fixup on all backends Signed-off-by: Ettore Di Giacinto * test CI Signed-off-by: Ettore Di Giacinto * Tweaks Signed-off-by: Ettore Di Giacinto * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto * Fixup CI Signed-off-by: Ettore Di Giacinto * Move dockerfile upper Signed-off-by: Ettore Di Giacinto * Fix proto Signed-off-by: Ettore Di Giacinto * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto * Fixups to index Signed-off-by: Ettore Di Giacinto * CI Signed-off-by: Ettore Di Giacinto * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto * Use self-hosted runners Signed-off-by: Ettore Di Giacinto * ci: use quay for test images Signed-off-by: Ettore Di Giacinto * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto * Small fixups on CI Signed-off-by: Ettore Di Giacinto * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto * Simplify CI builds Signed-off-by: Ettore Di Giacinto * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: Ettore Di Giacinto --- .github/dependabot.yml | 4 - .github/workflows/image-pr.yml | 85 +--- .github/workflows/image.yml | 158 ++---- .github/workflows/image_build.yml | 20 +- .github/workflows/python_backend.yml | 457 ++++++++++++++++++ .github/workflows/python_backend_build.yml | 198 ++++++++ .github/workflows/test.yml | 2 +- Dockerfile | 83 +--- aio/cpu/rerank.yaml | 10 +- aio/gpu-8g/rerank.yaml | 10 +- aio/intel/rerank.yaml | 10 +- backend/Dockerfile.python | 123 +++++ backend/index.yaml | 295 +++++++++++ backend/python/bark/Makefile | 2 +- backend/python/bark/install.sh | 7 +- backend/python/bark/run.sh | 7 +- backend/python/bark/test.sh | 7 +- backend/python/chatterbox/Makefile | 2 +- backend/python/chatterbox/install.sh | 7 +- .../chatterbox/requirements-hipblas.txt | 4 +- .../python/chatterbox/requirements-intel.txt | 1 - backend/python/chatterbox/run.sh | 7 +- backend/python/chatterbox/test.sh | 7 +- backend/python/common/template/install.sh | 7 +- backend/python/common/template/protogen.sh | 9 +- backend/python/common/template/run.sh | 7 +- backend/python/common/template/test.sh | 7 +- backend/python/coqui/Makefile | 2 +- backend/python/coqui/install.sh | 7 +- backend/python/coqui/run.sh | 7 +- backend/python/coqui/test.sh | 7 +- backend/python/diffusers/Makefile | 2 +- backend/python/diffusers/install.sh | 7 +- backend/python/diffusers/run.sh | 7 +- backend/python/diffusers/test.sh | 7 +- backend/python/exllama2/Makefile | 2 +- backend/python/exllama2/install.sh | 7 +- backend/python/exllama2/run.sh | 7 +- backend/python/exllama2/test.sh | 7 +- backend/python/faster-whisper/install.sh | 7 +- backend/python/faster-whisper/protogen.sh | 9 +- backend/python/faster-whisper/run.sh | 7 +- backend/python/faster-whisper/test.sh | 7 +- backend/python/kokoro/install.sh | 7 +- backend/python/kokoro/protogen.sh | 9 +- backend/python/kokoro/run.sh | 7 +- backend/python/kokoro/test.sh | 7 +- backend/python/rerankers/Makefile | 2 +- backend/python/rerankers/install.sh | 8 +- backend/python/rerankers/run.sh | 8 +- backend/python/rerankers/test.sh | 7 +- backend/python/transformers/Makefile | 2 +- backend/python/transformers/install.sh | 7 +- backend/python/transformers/run.sh | 7 +- backend/python/transformers/test.sh | 7 +- backend/python/vllm/Makefile | 2 +- backend/python/vllm/install.sh | 8 +- backend/python/vllm/requirements-hipblas.txt | 4 +- backend/python/vllm/run.sh | 9 +- backend/python/vllm/test.sh | 8 +- core/application/startup.go | 9 + core/cli/models.go | 2 +- core/cli/run.go | 13 +- core/config/application_config.go | 31 +- core/config/backend_config.go | 2 +- core/config/backend_config_test.go | 1 - core/config/guesser.go | 22 +- core/gallery/backend_types.go | 35 ++ core/gallery/backends.go | 107 ++++ core/gallery/backends_test.go | 151 ++++++ core/gallery/gallery.go | 273 ++++------- core/gallery/gallery_suite_test.go | 7 - core/gallery/metadata_type.go | 19 + core/gallery/models.go | 188 +++++-- core/gallery/models_test.go | 14 +- core/gallery/models_types.go | 46 ++ core/gallery/op.go | 25 - core/gallery/request.go | 76 --- core/gallery/request_test.go | 2 +- core/http/app.go | 2 +- core/http/app_test.go | 173 +++---- core/http/elements/gallery.go | 334 ++++++++++++- core/http/elements/progressbar.go | 34 +- core/http/endpoints/localai/backend.go | 152 ++++++ core/http/endpoints/localai/gallery.go | 80 +-- core/http/endpoints/localai/system.go | 3 + core/http/endpoints/localai/welcome.go | 6 +- core/http/routes/localai.go | 10 +- core/http/routes/ui.go | 333 +------------ core/http/routes/ui_backend_gallery.go | 258 ++++++++++ core/http/routes/ui_gallery.go | 282 +++++++++++ core/http/views/backends.html | 148 ++++++ core/http/views/partials/navbar.html | 6 + core/schema/backend.go | 7 + core/services/backends.go | 44 ++ core/services/gallery.go | 196 ++------ core/services/models.go | 153 ++++++ core/services/operation.go | 81 ++++ docs/content/backends.md | 118 +++++ docs/content/docs/features/text-to-audio.md | 5 - gallery/index.yaml | 23 + go.mod | 17 +- go.sum | 31 +- main.go | 3 +- pkg/model/initializers.go | 4 +- pkg/model/loader.go | 48 +- pkg/startup/backend_preload.go | 32 ++ pkg/startup/model_preload.go | 2 +- swagger/docs.go | 6 +- swagger/swagger.json | 6 +- swagger/swagger.yaml | 6 +- tests/fixtures/backend-image/Dockerfile | 4 + tests/fixtures/backend-image/run.sh | 0 tests/fixtures/backend-image/src/.keep | 4 + 114 files changed, 3996 insertions(+), 1382 deletions(-) create mode 100644 .github/workflows/python_backend.yml create mode 100644 .github/workflows/python_backend_build.yml create mode 100644 backend/Dockerfile.python create mode 100644 backend/index.yaml create mode 100644 core/gallery/backend_types.go create mode 100644 core/gallery/backends.go create mode 100644 core/gallery/backends_test.go create mode 100644 core/gallery/metadata_type.go create mode 100644 core/gallery/models_types.go delete mode 100644 core/gallery/op.go delete mode 100644 core/gallery/request.go create mode 100644 core/http/endpoints/localai/backend.go create mode 100644 core/http/routes/ui_backend_gallery.go create mode 100644 core/http/routes/ui_gallery.go create mode 100644 core/http/views/backends.html create mode 100644 core/schema/backend.go create mode 100644 core/services/backends.go create mode 100644 core/services/models.go create mode 100644 core/services/operation.go create mode 100644 docs/content/backends.md create mode 100644 pkg/startup/backend_preload.go create mode 100644 tests/fixtures/backend-image/Dockerfile create mode 100644 tests/fixtures/backend-image/run.sh create mode 100644 tests/fixtures/backend-image/src/.keep diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5e8f919b..cf3a252b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -61,10 +61,6 @@ updates: directory: "/backend/python/openvoice" schedule: interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/parler-tts" - schedule: - interval: "weekly" - package-ecosystem: "pip" directory: "/backend/python/rerankers" schedule: diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 2d8ce440..0fa94744 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -9,13 +9,12 @@ concurrency: cancel-in-progress: true jobs: - extras-image-build: + image-build: uses: ./.github/workflows/image_build.yml with: tag-latest: ${{ matrix.tag-latest }} tag-suffix: ${{ matrix.tag-suffix }} ffmpeg: ${{ matrix.ffmpeg }} - image-type: ${{ matrix.image-type }} build-type: ${{ matrix.build-type }} cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} @@ -36,16 +35,6 @@ jobs: fail-fast: false matrix: include: - # This is basically covered by the AIO test - # - build-type: '' - # platforms: 'linux/amd64' - # tag-latest: 'false' - # tag-suffix: '-ffmpeg' - # ffmpeg: 'true' - # image-type: 'extras' - # runs-on: 'arc-runner-set' - # base-image: "ubuntu:22.04" - # makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -53,8 +42,7 @@ jobs: tag-latest: 'false' tag-suffix: '-cublas-cuda12-ffmpeg' ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' + runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' @@ -62,10 +50,9 @@ jobs: tag-latest: 'false' tag-suffix: '-hipblas' ffmpeg: 'false' - image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.1" grpc-base-image: "ubuntu:22.04" - runs-on: 'arc-runner-set' + runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' @@ -74,77 +61,13 @@ jobs: grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg' ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' + runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-vulkan-ffmpeg-core' ffmpeg: 'true' - image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=4 --output-sync=target" - # core-image-build: - # uses: ./.github/workflows/image_build.yml - # with: - # tag-latest: ${{ matrix.tag-latest }} - # tag-suffix: ${{ matrix.tag-suffix }} - # ffmpeg: ${{ matrix.ffmpeg }} - # image-type: ${{ matrix.image-type }} - # build-type: ${{ matrix.build-type }} - # cuda-major-version: ${{ matrix.cuda-major-version }} - # cuda-minor-version: ${{ matrix.cuda-minor-version }} - # platforms: ${{ matrix.platforms }} - # runs-on: ${{ matrix.runs-on }} - # base-image: ${{ matrix.base-image }} - # grpc-base-image: ${{ matrix.grpc-base-image }} - # makeflags: ${{ matrix.makeflags }} - # secrets: - # dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - # dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - # quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - # quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - # strategy: - # matrix: - # include: - # - build-type: '' - # platforms: 'linux/amd64' - # tag-latest: 'false' - # tag-suffix: '-ffmpeg-core' - # ffmpeg: 'true' - # image-type: 'core' - # runs-on: 'ubuntu-latest' - # base-image: "ubuntu:22.04" - # makeflags: "--jobs=4 --output-sync=target" - # - build-type: 'sycl_f16' - # platforms: 'linux/amd64' - # tag-latest: 'false' - # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - # grpc-base-image: "ubuntu:22.04" - # tag-suffix: 'sycl-f16-ffmpeg-core' - # ffmpeg: 'true' - # image-type: 'core' - # runs-on: 'arc-runner-set' - # makeflags: "--jobs=3 --output-sync=target" - # - build-type: 'cublas' - # cuda-major-version: "12" - # cuda-minor-version: "0" - # platforms: 'linux/amd64' - # tag-latest: 'false' - # tag-suffix: '-cublas-cuda12-ffmpeg-core' - # ffmpeg: 'true' - # image-type: 'core' - # runs-on: 'ubuntu-latest' - # base-image: "ubuntu:22.04" - # makeflags: "--jobs=4 --output-sync=target" - # - build-type: 'vulkan' - # platforms: 'linux/amd64' - # tag-latest: 'false' - # tag-suffix: '-vulkan-ffmpeg-core' - # ffmpeg: 'true' - # image-type: 'core' - # runs-on: 'ubuntu-latest' - # base-image: "ubuntu:22.04" - # makeflags: "--jobs=4 --output-sync=target" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 01709010..f7a141e0 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -37,24 +37,9 @@ jobs: quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} strategy: - # Pushing with all jobs in parallel - # eats the bandwidth of all the nodes max-parallel: 2 matrix: include: - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-hipblas-extras' - ffmpeg: 'true' - image-type: 'extras' - aio: "-aio-gpu-hipblas" - base-image: "rocm/dev-ubuntu-22.04:6.1" - grpc-base-image: "ubuntu:22.04" - latest-image: 'latest-gpu-hipblas-extras' - latest-image-aio: 'latest-aio-gpu-hipblas' - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' @@ -66,112 +51,8 @@ jobs: runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" latest-image: 'latest-gpu-hipblas' - self-hosted-jobs: - uses: ./.github/workflows/image_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} - image-type: ${{ matrix.image-type }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - base-image: ${{ matrix.base-image }} - grpc-base-image: ${{ matrix.grpc-base-image }} - aio: ${{ matrix.aio }} - makeflags: ${{ matrix.makeflags }} - latest-image: ${{ matrix.latest-image }} - latest-image-aio: ${{ matrix.latest-image-aio }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - # Pushing with all jobs in parallel - # eats the bandwidth of all the nodes - max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }} - matrix: - include: - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-cublas-cuda11-extras' - ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' - base-image: "ubuntu:22.04" - aio: "-aio-gpu-nvidia-cuda-11" - latest-image: 'latest-gpu-nvidia-cuda-11-extras' - latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11' - makeflags: "--jobs=3 --output-sync=target" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-cublas-cuda12-extras' - ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' - base-image: "ubuntu:22.04" - aio: "-aio-gpu-nvidia-cuda-12" - latest-image: 'latest-gpu-nvidia-cuda-12-extras' - latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12' - makeflags: "--jobs=3 --output-sync=target" - - build-type: 'sycl_f16' - platforms: 'linux/amd64' - tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" - tag-suffix: '-sycl-f16-extras' - ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' - aio: "-aio-gpu-intel-f16" - latest-image: 'latest-gpu-intel-f16-extras' - latest-image-aio: 'latest-aio-gpu-intel-f16' - makeflags: "--jobs=3 --output-sync=target" - - build-type: 'sycl_f32' - platforms: 'linux/amd64' - tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" - tag-suffix: '-sycl-f32-extras' - ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' - aio: "-aio-gpu-intel-f32" - latest-image: 'latest-gpu-intel-f32-extras' - latest-image-aio: 'latest-aio-gpu-intel-f32' - makeflags: "--jobs=3 --output-sync=target" - # Core images - - build-type: 'sycl_f16' - platforms: 'linux/amd64' - tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" - tag-suffix: '-sycl-f16' - ffmpeg: 'true' - image-type: 'core' - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - latest-image: 'latest-gpu-intel-f16' - - build-type: 'sycl_f32' - platforms: 'linux/amd64' - tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" - tag-suffix: '-sycl-f32' - ffmpeg: 'true' - image-type: 'core' - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - latest-image: 'latest-gpu-intel-f32' + aio: "-aio-gpu-hipblas" + latest-image-aio: 'latest-aio-gpu-hipblas' core-image-build: uses: ./.github/workflows/image_build.yml @@ -226,7 +107,9 @@ jobs: base-image: "ubuntu:22.04" makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' - latest-image: 'latest-gpu-nvidia-cuda-12' + latest-image: 'latest-gpu-nvidia-cuda-11' + aio: "-aio-gpu-nvidia-cuda-11" + latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -240,6 +123,8 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" latest-image: 'latest-gpu-nvidia-cuda-12' + aio: "-aio-gpu-nvidia-cuda-12" + latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12' - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'false' @@ -251,6 +136,35 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" latest-image: 'latest-gpu-vulkan' + aio: "-aio-gpu-vulkan" + latest-image-aio: 'latest-aio-gpu-vulkan' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + grpc-base-image: "ubuntu:22.04" + tag-suffix: '-sycl-f16' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" + latest-image: 'latest-gpu-intel-f16' + aio: "-aio-gpu-intel-f16" + latest-image-aio: 'latest-aio-gpu-intel-f16' + - build-type: 'sycl_f32' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + grpc-base-image: "ubuntu:22.04" + tag-suffix: '-sycl-f32' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" + latest-image: 'latest-gpu-intel-f32' + aio: "-aio-gpu-intel-f32" + latest-image-aio: 'latest-aio-gpu-intel-f32' + gh-runner: uses: ./.github/workflows/image_build.yml with: diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index a84af8d0..bd7f8edd 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -53,10 +53,6 @@ on: description: 'Skip drivers by default' default: 'false' type: string - image-type: - description: 'Image type' - default: '' - type: string runs-on: description: 'Runs on' required: true @@ -159,11 +155,11 @@ jobs: uses: docker/metadata-action@v5 with: images: | - ttl.sh/localai-ci-pr-${{ github.event.number }} + quay.io/go-skynet/ci-tests tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - type=sha + type=ref,event=branch,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }} + type=semver,pattern={{raw}},suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }} + type=sha,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }} flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.tag-suffix }} @@ -211,7 +207,7 @@ jobs: password: ${{ secrets.dockerPassword }} - name: Login to DockerHub - if: github.event_name != 'pull_request' + # if: github.event_name != 'pull_request' uses: docker/login-action@v3 with: registry: quay.io @@ -232,7 +228,6 @@ jobs: CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} FFMPEG=${{ inputs.ffmpeg }} - IMAGE_TYPE=${{ inputs.image-type }} BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target @@ -261,7 +256,6 @@ jobs: CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} FFMPEG=${{ inputs.ffmpeg }} - IMAGE_TYPE=${{ inputs.image-type }} BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target @@ -275,10 +269,6 @@ jobs: push: true tags: ${{ steps.meta_pull_request.outputs.tags }} labels: ${{ steps.meta_pull_request.outputs.labels }} - - name: Testing image - if: github.event_name == 'pull_request' - run: | - echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY ## End testing image - name: Build and push AIO image if: inputs.aio != '' diff --git a/.github/workflows/python_backend.yml b/.github/workflows/python_backend.yml new file mode 100644 index 00000000..49ce3299 --- /dev/null +++ b/.github/workflows/python_backend.yml @@ -0,0 +1,457 @@ +--- +name: 'build python backend container images' + +on: + push: + branches: + - master + tags: + - '*' + pull_request: + +concurrency: + group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + backend-jobs: + uses: ./.github/workflows/python_backend_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} + backend: ${{ matrix.backend }} + latest-image: ${{ matrix.latest-image }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + fail-fast: false + max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }} + matrix: + include: + # CUDA 11 builds + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-rerankers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "rerankers" + latest-image: 'latest-gpu-nvidia-cuda-11-rerankers' + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-vllm' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "vllm" + latest-image: 'latest-gpu-nvidia-cuda-11-vllm' + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-transformers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "transformers" + latest-image: 'latest-gpu-nvidia-cuda-11-transformers' + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-diffusers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "diffusers" + latest-image: 'latest-gpu-nvidia-cuda-11-diffusers' + # CUDA 11 additional backends + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-kokoro' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "kokoro" + latest-image: 'latest-gpu-nvidia-cuda-11-kokoro' + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "faster-whisper" + latest-image: 'latest-gpu-nvidia-cuda-11-faster-whisper' + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-coqui' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "coqui" + latest-image: 'latest-gpu-nvidia-cuda-11-coqui' + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-bark' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "bark" + latest-image: 'latest-gpu-nvidia-cuda-11-bark' + - build-type: 'cublas' + cuda-major-version: "11" + cuda-minor-version: "7" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "chatterbox" + latest-image: 'latest-gpu-nvidia-cuda-11-chatterbox' + # CUDA 12 builds + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-rerankers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "rerankers" + latest-image: 'latest-gpu-nvidia-cuda-12-rerankers' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-vllm' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "vllm" + latest-image: 'latest-gpu-nvidia-cuda-12-vllm' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-transformers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "transformers" + latest-image: 'latest-gpu-nvidia-cuda-12-transformers' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-diffusers' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "diffusers" + latest-image: 'latest-gpu-nvidia-cuda-12-diffusers' + # CUDA 12 additional backends + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-kokoro' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "kokoro" + latest-image: 'latest-gpu-nvidia-cuda-12-kokoro' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "faster-whisper" + latest-image: 'latest-gpu-nvidia-cuda-12-faster-whisper' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-coqui' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "coqui" + latest-image: 'latest-gpu-nvidia-cuda-12-coqui' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-bark' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "bark" + latest-image: 'latest-gpu-nvidia-cuda-12-bark' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' + runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" + backend: "chatterbox" + latest-image: 'latest-gpu-nvidia-cuda-12-chatterbox' + # hipblas builds + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-rerankers' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "rerankers" + latest-image: 'latest-gpu-rocm-hipblas-rerankers' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-vllm' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "vllm" + latest-image: 'latest-gpu-rocm-hipblas-vllm' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-transformers' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "transformers" + latest-image: 'latest-gpu-rocm-hipblas-transformers' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-diffusers' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "diffusers" + latest-image: 'latest-gpu-rocm-hipblas-diffusers' + # ROCm additional backends + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-kokoro' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "kokoro" + latest-image: 'latest-gpu-rocm-hipblas-kokoro' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-faster-whisper' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "faster-whisper" + latest-image: 'latest-gpu-rocm-hipblas-faster-whisper' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-coqui' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "coqui" + latest-image: 'latest-gpu-rocm-hipblas-coqui' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-rocm-hipblas-bark' + runs-on: 'arc-runner-set' + base-image: "rocm/dev-ubuntu-22.04:6.1" + backend: "bark" + latest-image: 'latest-gpu-rocm-hipblas-bark' + # sycl builds + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-rerankers' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "rerankers" + latest-image: 'latest-gpu-intel-sycl-f32-rerankers' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-rerankers' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "rerankers" + latest-image: 'latest-gpu-intel-sycl-f16-rerankers' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-vllm' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "vllm" + latest-image: 'latest-gpu-intel-sycl-f32-vllm' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-vllm' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "vllm" + latest-image: 'latest-gpu-intel-sycl-f16-vllm' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-transformers' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "transformers" + latest-image: 'latest-gpu-intel-sycl-f32-transformers' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-transformers' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "transformers" + latest-image: 'latest-gpu-intel-sycl-f16-transformers' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-diffusers' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "diffusers" + latest-image: 'latest-gpu-intel-sycl-f32-diffusers' + # SYCL additional backends + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-kokoro' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "kokoro" + latest-image: 'latest-gpu-intel-sycl-f32-kokoro' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-kokoro' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "kokoro" + latest-image: 'latest-gpu-intel-sycl-f16-kokoro' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-faster-whisper' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "faster-whisper" + latest-image: 'latest-gpu-intel-sycl-f32-faster-whisper' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-faster-whisper' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "faster-whisper" + latest-image: 'latest-gpu-intel-sycl-f16-faster-whisper' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-coqui' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "coqui" + latest-image: 'latest-gpu-intel-sycl-f32-coqui' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-coqui' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "coqui" + latest-image: 'latest-gpu-intel-sycl-f16-coqui' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f32-bark' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "bark" + latest-image: 'latest-gpu-intel-sycl-f32-bark' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'true' + tag-suffix: '-gpu-intel-sycl-f16-bark' + runs-on: 'arc-runner-set' + base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + backend: "bark" + latest-image: 'latest-gpu-intel-sycl-f16-bark' diff --git a/.github/workflows/python_backend_build.yml b/.github/workflows/python_backend_build.yml new file mode 100644 index 00000000..f2d7c762 --- /dev/null +++ b/.github/workflows/python_backend_build.yml @@ -0,0 +1,198 @@ +--- +name: 'build python backend container images (reusable)' + +on: + workflow_call: + inputs: + base-image: + description: 'Base image' + required: true + type: string + build-type: + description: 'Build type' + default: '' + type: string + cuda-major-version: + description: 'CUDA major version' + default: "12" + type: string + cuda-minor-version: + description: 'CUDA minor version' + default: "1" + type: string + platforms: + description: 'Platforms' + default: '' + type: string + tag-latest: + description: 'Tag latest' + default: '' + type: string + latest-image: + description: 'Tag latest' + default: '' + type: string + tag-suffix: + description: 'Tag suffix' + default: '' + type: string + runs-on: + description: 'Runs on' + required: true + default: '' + type: string + backend: + description: 'Backend to build' + required: true + type: string + secrets: + dockerUsername: + required: true + dockerPassword: + required: true + quayUsername: + required: true + quayPassword: + required: true + +jobs: + reusable_python_backend-build: + runs-on: ${{ inputs.runs-on }} + steps: + - name: Force Install GIT latest + run: | + sudo apt-get update \ + && sudo apt-get install -y software-properties-common \ + && sudo apt-get update \ + && sudo add-apt-repository -y ppa:git-core/ppa \ + && sudo apt-get update \ + && sudo apt-get install -y git + + - name: Checkout + uses: actions/checkout@v4 + + - name: Release space from worker + if: inputs.runs-on == 'ubuntu-latest' + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get autoremove -y + sudo apt-get clean + echo + df -h + + - name: Docker meta + id: meta + if: github.event_name != 'pull_request' + uses: docker/metadata-action@v5 + with: + images: | + quay.io/go-skynet/local-ai-backends + localai/localai-backends + tags: | + type=ref,event=branch + type=semver,pattern={{raw}} + type=sha + flavor: | + latest=${{ inputs.tag-latest }} + suffix=${{ inputs.tag-suffix }} + + - name: Docker meta for PR + id: meta_pull_request + if: github.event_name == 'pull_request' + uses: docker/metadata-action@v5 + with: + images: | + quay.io/go-skynet/ci-tests + tags: | + type=ref,event=branch,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }} + type=semver,pattern={{raw}},suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }} + type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }} + flavor: | + latest=${{ inputs.tag-latest }} + suffix=${{ inputs.tag-suffix }} +## End testing image + - name: Set up QEMU + uses: docker/setup-qemu-action@master + with: + platforms: all + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@master + + - name: Login to DockerHub + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + username: ${{ secrets.dockerUsername }} + password: ${{ secrets.dockerPassword }} + + - name: Login to Quay.io + # if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: quay.io + username: ${{ secrets.quayUsername }} + password: ${{ secrets.quayPassword }} + + - name: Build and push + uses: docker/build-push-action@v6 + if: github.event_name != 'pull_request' + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + BUILD_TYPE=${{ inputs.build-type }} + CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} + CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} + BASE_IMAGE=${{ inputs.base-image }} + BACKEND=${{ inputs.backend }} + context: ./backend + file: ./backend/Dockerfile.python + cache-from: type=gha + platforms: ${{ inputs.platforms }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Build and push (PR) + uses: docker/build-push-action@v6 + if: github.event_name == 'pull_request' + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + BUILD_TYPE=${{ inputs.build-type }} + CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} + CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} + BASE_IMAGE=${{ inputs.base-image }} + BACKEND=${{ inputs.backend }} + context: ./backend + file: ./backend/Dockerfile.python + cache-from: type=gha + platforms: ${{ inputs.platforms }} + push: true + tags: ${{ steps.meta_pull_request.outputs.tags }} + labels: ${{ steps.meta_pull_request.outputs.labels }} + + - name: Cleanup + run: | + docker builder prune -f + docker system prune --force --volumes --all + + - name: Latest tag + if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' + run: | + docker pull localai/localai-backends:${{ steps.meta.outputs.version }} + docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }} + docker push localai/localai-backends:${{ inputs.latest-image }} + docker pull quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} + docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }} + docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }} + + - name: job summary + run: | + echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 52c3f798..87f39167 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -188,7 +188,7 @@ jobs: PATH="$PATH:$HOME/go/bin" make protogen-go - name: Build images run: | - docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile . + docker build --build-arg FFMPEG=true --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile . BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio - name: Test run: | diff --git a/Dockerfile b/Dockerfile index 4aa0e4fd..c412e1b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,9 @@ -ARG IMAGE_TYPE=extras ARG BASE_IMAGE=ubuntu:22.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} ARG INTEL_BASE_IMAGE=${BASE_IMAGE} # The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it. -FROM ${BASE_IMAGE} AS requirements-core +FROM ${BASE_IMAGE} AS requirements USER root @@ -15,13 +14,12 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,chatterbox:/build/backend/python/chatterbox/run.sh" RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ ccache \ - ca-certificates \ + ca-certificates espeak-ng \ curl libssl-dev \ git \ git-lfs \ @@ -76,38 +74,12 @@ RUN apt-get update && \ WORKDIR /build -################################### -################################### - -# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it -FROM requirements-core AS requirements-extras - -# Install uv as a system package -RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh -ENV PATH="/root/.cargo/bin:${PATH}" - -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - espeak-ng \ - espeak \ - python3-pip \ - python-is-python3 \ - python3-dev llvm \ - python3-venv && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - pip install --upgrade pip - -# Install grpcio-tools (the version in 22.04 is too old) -RUN pip install --user grpcio-tools==1.71.0 grpcio==1.71.0 ################################### ################################### # The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here. -# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg -FROM requirements-${IMAGE_TYPE} AS requirements-drivers +FROM requirements AS requirements-drivers ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=12 @@ -376,8 +348,6 @@ FROM requirements-drivers ARG FFMPEG ARG BUILD_TYPE ARG TARGETARCH -ARG IMAGE_TYPE=extras -ARG EXTRA_BACKENDS ARG MAKEFLAGS ENV BUILD_TYPE=${BUILD_TYPE} @@ -416,56 +386,13 @@ COPY --from=builder /build/local-ai ./ # Copy shared libraries for piper COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/ -# Change the shell to bash so we can use [[ tests below -SHELL ["/bin/bash", "-c"] -# We try to strike a balance between individual layer size (as that affects total push time) and total image size -# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer -# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer - -RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \ - apt-get -qq -y install espeak-ng \ - ; fi - -RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/coqui \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/faster-whisper \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/diffusers \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "chatterbox" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" && "${BUILD_TYPE}" = "cublas" && "${CUDA_MAJOR_VERSION}" = "12" ]]; then \ - make -C backend/python/chatterbox \ - ; fi - -RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/kokoro \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/exllama2 \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/transformers \ - ; fi - -RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/vllm \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/bark \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/rerankers \ - ; fi - # Make sure the models directory exists -RUN mkdir -p /build/models +RUN mkdir -p /build/models /build/backends # Define the health check command HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1 -VOLUME /build/models +VOLUME /build/models /build/backends EXPOSE 8080 ENTRYPOINT [ "/build/entrypoint.sh" ] diff --git a/aio/cpu/rerank.yaml b/aio/cpu/rerank.yaml index b84755a8..b52dee77 100644 --- a/aio/cpu/rerank.yaml +++ b/aio/cpu/rerank.yaml @@ -1,7 +1,13 @@ name: jina-reranker-v1-base-en -backend: rerankers +reranking: true +f16: true parameters: - model: cross-encoder + model: jina-reranker-v1-tiny-en.f16.gguf + +download_files: + - filename: jina-reranker-v1-tiny-en.f16.gguf + sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 + uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf usage: | You can test this model with curl like this: diff --git a/aio/gpu-8g/rerank.yaml b/aio/gpu-8g/rerank.yaml index b84755a8..b52dee77 100644 --- a/aio/gpu-8g/rerank.yaml +++ b/aio/gpu-8g/rerank.yaml @@ -1,7 +1,13 @@ name: jina-reranker-v1-base-en -backend: rerankers +reranking: true +f16: true parameters: - model: cross-encoder + model: jina-reranker-v1-tiny-en.f16.gguf + +download_files: + - filename: jina-reranker-v1-tiny-en.f16.gguf + sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 + uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf usage: | You can test this model with curl like this: diff --git a/aio/intel/rerank.yaml b/aio/intel/rerank.yaml index b84755a8..b52dee77 100644 --- a/aio/intel/rerank.yaml +++ b/aio/intel/rerank.yaml @@ -1,7 +1,13 @@ name: jina-reranker-v1-base-en -backend: rerankers +reranking: true +f16: true parameters: - model: cross-encoder + model: jina-reranker-v1-tiny-en.f16.gguf + +download_files: + - filename: jina-reranker-v1-tiny-en.f16.gguf + sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 + uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf usage: | You can test this model with curl like this: diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python new file mode 100644 index 00000000..04e8e020 --- /dev/null +++ b/backend/Dockerfile.python @@ -0,0 +1,123 @@ +ARG BASE_IMAGE=ubuntu:22.04 + +FROM ${BASE_IMAGE} AS builder +ARG BACKEND=rerankers +ARG BUILD_TYPE +ENV BUILD_TYPE=${BUILD_TYPE} +ARG CUDA_MAJOR_VERSION +ARG CUDA_MINOR_VERSION +ARG SKIP_DRIVERS=false +ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} +ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} +ENV DEBIAN_FRONTEND=noninteractive +ARG TARGETARCH +ARG TARGETVARIANT + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + ccache \ + ca-certificates \ + espeak-ng \ + curl \ + libssl-dev \ + git \ + git-lfs \ + unzip \ + upx-ucl \ + curl python3-pip \ + python-is-python3 \ + python3-dev llvm \ + python3-venv make && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + pip install --upgrade pip + + +# Cuda +ENV PATH=/usr/local/cuda/bin:${PATH} + +# HipBLAS requirements +ENV PATH=/opt/rocm/bin:${PATH} + +# Vulkan requirements +RUN < 0 { - var err error - config, err = GetGalleryConfigFromURL(model.URL, basePath) - if err != nil { - return err - } - config.Description = model.Description - config.License = model.License - } else if len(model.ConfigFile) > 0 { - // TODO: is this worse than using the override method with a blank cfg yaml? - reYamlConfig, err := yaml.Marshal(model.ConfigFile) - if err != nil { - return err - } - config = Config{ - ConfigFile: string(reYamlConfig), - Description: model.Description, - License: model.License, - URLs: model.URLs, - Name: model.Name, - Files: make([]File, 0), // Real values get added below, must be blank - // Prompt Template Skipped for now - I expect in this mode that they will be delivered as files. - } - } else { - return fmt.Errorf("invalid gallery model %+v", model) - } - - installName := model.Name - if req.Name != "" { - installName = req.Name - } - - // Copy the model configuration from the request schema - config.URLs = append(config.URLs, model.URLs...) - config.Icon = model.Icon - config.Files = append(config.Files, req.AdditionalFiles...) - config.Files = append(config.Files, model.AdditionalFiles...) - - // TODO model.Overrides could be merged with user overrides (not defined yet) - if err := mergo.Merge(&model.Overrides, req.Overrides, mergo.WithOverride); err != nil { - return err - } - - if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil { - return err - } - - return nil - } - - models, err := AvailableGalleryModels(galleries, basePath) +func GetGalleryConfigFromURL[T any](url string, basePath string) (T, error) { + var config T + uri := downloader.URI(url) + err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { + return yaml.Unmarshal(d, &config) + }) if err != nil { - return err + log.Error().Err(err).Str("url", url).Msg("failed to get gallery config for url") + return config, err } - - model := FindModel(models, name, basePath) - if model == nil { - return fmt.Errorf("no model found with name %q", name) - } - - return applyModel(model) + return config, nil } -func FindModel(models []*GalleryModel, name string, basePath string) *GalleryModel { - var model *GalleryModel +func ReadConfigFile[T any](filePath string) (*T, error) { + // Read the YAML file + yamlFile, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("failed to read YAML file: %v", err) + } + + // Unmarshal YAML data into a Config struct + var config T + err = yaml.Unmarshal(yamlFile, &config) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal YAML: %v", err) + } + + return &config, nil +} + +type GalleryElement interface { + SetGallery(gallery config.Gallery) + SetInstalled(installed bool) + GetName() string + GetDescription() string + GetTags() []string + GetGallery() config.Gallery +} + +type GalleryElements[T GalleryElement] []T + +func (gm GalleryElements[T]) Search(term string) GalleryElements[T] { + var filteredModels GalleryElements[T] + + for _, m := range gm { + if strings.Contains(m.GetName(), term) || + strings.Contains(m.GetDescription(), term) || + strings.Contains(m.GetGallery().Name, term) || + strings.Contains(strings.Join(m.GetTags(), ","), term) { + filteredModels = append(filteredModels, m) + } + } + return filteredModels +} + +func (gm GalleryElements[T]) FindByName(name string) T { + for _, m := range gm { + if strings.EqualFold(m.GetName(), name) { + return m + } + } + var zero T + return zero +} + +func (gm GalleryElements[T]) Paginate(pageNum int, itemsNum int) GalleryElements[T] { + start := (pageNum - 1) * itemsNum + end := start + itemsNum + if start > len(gm) { + start = len(gm) + } + if end > len(gm) { + end = len(gm) + } + return gm[start:end] +} + +func FindGalleryElement[T GalleryElement](models []T, name string, basePath string) T { + var model T name = strings.ReplaceAll(name, string(os.PathSeparator), "__") if !strings.Contains(name, "@") { for _, m := range models { - if strings.EqualFold(m.Name, name) { + if strings.EqualFold(m.GetName(), name) { model = m break } } - if model == nil { - return nil - } } else { for _, m := range models { - if strings.EqualFold(name, fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)) { + if strings.EqualFold(name, fmt.Sprintf("%s@%s", m.GetGallery().Name, m.GetName())) { model = m break } @@ -116,12 +116,28 @@ func FindModel(models []*GalleryModel, name string, basePath string) *GalleryMod // List available models // Models galleries are a list of yaml files that are hosted on a remote server (for example github). // Each yaml file contains a list of models that can be downloaded and optionally overrides to define a new model setting. -func AvailableGalleryModels(galleries []config.Gallery, basePath string) (GalleryModels, error) { +func AvailableGalleryModels(galleries []config.Gallery, basePath string) (GalleryElements[*GalleryModel], error) { var models []*GalleryModel // Get models from galleries for _, gallery := range galleries { - galleryModels, err := getGalleryModels(gallery, basePath) + galleryModels, err := getGalleryElements[*GalleryModel](gallery, basePath) + if err != nil { + return nil, err + } + models = append(models, galleryModels...) + } + + return models, nil +} + +// List available backends +func AvailableBackends(galleries []config.Gallery, basePath string) (GalleryElements[*GalleryBackend], error) { + var models []*GalleryBackend + + // Get models from galleries + for _, gallery := range galleries { + galleryModels, err := getGalleryElements[*GalleryBackend](gallery, basePath) if err != nil { return nil, err } @@ -146,8 +162,8 @@ func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) return refFile, err } -func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel, error) { - var models []*GalleryModel = []*GalleryModel{} +func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath string) ([]T, error) { + var models []T = []T{} if strings.HasSuffix(gallery.URL, ".ref") { var err error @@ -170,97 +186,16 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel, // Add gallery to models for _, model := range models { - model.Gallery = gallery + model.SetGallery(gallery) // we check if the model was already installed by checking if the config file exists // TODO: (what to do if the model doesn't install a config file?) - if _, err := os.Stat(filepath.Join(basePath, fmt.Sprintf("%s.yaml", model.Name))); err == nil { - model.Installed = true + // TODO: This is sub-optimal now that the gallery handles both backends and models - we need to abstract this away + if _, err := os.Stat(filepath.Join(basePath, fmt.Sprintf("%s.yaml", model.GetName()))); err == nil { + model.SetInstalled(true) + } + if _, err := os.Stat(filepath.Join(basePath, model.GetName())); err == nil { + model.SetInstalled(true) } } return models, nil } - -func GetLocalModelConfiguration(basePath string, name string) (*Config, error) { - name = strings.ReplaceAll(name, string(os.PathSeparator), "__") - galleryFile := filepath.Join(basePath, galleryFileName(name)) - return ReadConfigFile(galleryFile) -} - -func DeleteModelFromSystem(basePath string, name string, additionalFiles []string) error { - // os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths. - name = strings.ReplaceAll(name, string(os.PathSeparator), "__") - - configFile := filepath.Join(basePath, fmt.Sprintf("%s.yaml", name)) - - galleryFile := filepath.Join(basePath, galleryFileName(name)) - - for _, f := range []string{configFile, galleryFile} { - if err := utils.VerifyPath(f, basePath); err != nil { - return fmt.Errorf("failed to verify path %s: %w", f, err) - } - } - - var err error - // Delete all the files associated to the model - // read the model config - galleryconfig, err := ReadConfigFile(galleryFile) - if err != nil { - log.Error().Err(err).Msgf("failed to read gallery file %s", configFile) - } - - var filesToRemove []string - - // Remove additional files - if galleryconfig != nil { - for _, f := range galleryconfig.Files { - fullPath := filepath.Join(basePath, f.Filename) - filesToRemove = append(filesToRemove, fullPath) - } - } - - for _, f := range additionalFiles { - fullPath := filepath.Join(filepath.Join(basePath, f)) - filesToRemove = append(filesToRemove, fullPath) - } - - filesToRemove = append(filesToRemove, configFile) - filesToRemove = append(filesToRemove, galleryFile) - - // skip duplicates - filesToRemove = utils.Unique(filesToRemove) - - // Removing files - for _, f := range filesToRemove { - if e := os.Remove(f); e != nil { - err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e)) - } - } - - return err -} - -// This is ***NEVER*** going to be perfect or finished. -// This is a BEST EFFORT function to surface known-vulnerable models to users. -func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error { - galleryModels, err := AvailableGalleryModels(galleries, basePath) - if err != nil { - return err - } - for _, gM := range galleryModels { - if gM.Installed { - err = errors.Join(err, SafetyScanGalleryModel(gM)) - } - } - return err -} - -func SafetyScanGalleryModel(galleryModel *GalleryModel) error { - for _, file := range galleryModel.AdditionalFiles { - scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI)) - if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) { - log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!") - return err - } - } - return nil -} diff --git a/core/gallery/gallery_suite_test.go b/core/gallery/gallery_suite_test.go index bf13cac9..44256bc2 100644 --- a/core/gallery/gallery_suite_test.go +++ b/core/gallery/gallery_suite_test.go @@ -1,7 +1,6 @@ package gallery_test import ( - "os" "testing" . "github.com/onsi/ginkgo/v2" @@ -12,9 +11,3 @@ func TestGallery(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Gallery test suite") } - -var _ = BeforeSuite(func() { - if os.Getenv("FIXTURES") == "" { - Fail("FIXTURES env var not set") - } -}) diff --git a/core/gallery/metadata_type.go b/core/gallery/metadata_type.go new file mode 100644 index 00000000..f0059eab --- /dev/null +++ b/core/gallery/metadata_type.go @@ -0,0 +1,19 @@ +package gallery + +import "github.com/mudler/LocalAI/core/config" + +type Metadata struct { + URL string `json:"url,omitempty" yaml:"url,omitempty"` + Name string `json:"name,omitempty" yaml:"name,omitempty"` + Description string `json:"description,omitempty" yaml:"description,omitempty"` + License string `json:"license,omitempty" yaml:"license,omitempty"` + URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"` + Icon string `json:"icon,omitempty" yaml:"icon,omitempty"` + Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"` + // AdditionalFiles are used to add additional files to the model + AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"` + // Gallery is a reference to the gallery which contains the model + Gallery config.Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"` + // Installed is used to indicate if the model is installed or not + Installed bool `json:"installed,omitempty" yaml:"installed,omitempty"` +} diff --git a/core/gallery/models.go b/core/gallery/models.go index 58f1963a..428f5115 100644 --- a/core/gallery/models.go +++ b/core/gallery/models.go @@ -5,8 +5,10 @@ import ( "fmt" "os" "path/filepath" + "strings" "dario.cat/mergo" + "github.com/mudler/LocalAI/core/config" lconfig "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/utils" @@ -41,10 +43,10 @@ prompt_templates: content: "" */ -// Config is the model configuration which contains all the model details +// ModelConfig is the model configuration which contains all the model details // This configuration is read from the gallery endpoint and is used to download and install the model // It is the internal structure, separated from the request -type Config struct { +type ModelConfig struct { Description string `yaml:"description"` Icon string `yaml:"icon"` License string `yaml:"license"` @@ -66,37 +68,78 @@ type PromptTemplate struct { Content string `yaml:"content"` } -func GetGalleryConfigFromURL(url string, basePath string) (Config, error) { - var config Config - uri := downloader.URI(url) - err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { - return yaml.Unmarshal(d, &config) - }) - if err != nil { - log.Error().Err(err).Str("url", url).Msg("failed to get gallery config for url") - return config, err +// Installs a model from the gallery +func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error { + + applyModel := func(model *GalleryModel) error { + name = strings.ReplaceAll(name, string(os.PathSeparator), "__") + + var config ModelConfig + + if len(model.URL) > 0 { + var err error + config, err = GetGalleryConfigFromURL[ModelConfig](model.URL, basePath) + if err != nil { + return err + } + config.Description = model.Description + config.License = model.License + } else if len(model.ConfigFile) > 0 { + // TODO: is this worse than using the override method with a blank cfg yaml? + reYamlConfig, err := yaml.Marshal(model.ConfigFile) + if err != nil { + return err + } + config = ModelConfig{ + ConfigFile: string(reYamlConfig), + Description: model.Description, + License: model.License, + URLs: model.URLs, + Name: model.Name, + Files: make([]File, 0), // Real values get added below, must be blank + // Prompt Template Skipped for now - I expect in this mode that they will be delivered as files. + } + } else { + return fmt.Errorf("invalid gallery model %+v", model) + } + + installName := model.Name + if req.Name != "" { + installName = req.Name + } + + // Copy the model configuration from the request schema + config.URLs = append(config.URLs, model.URLs...) + config.Icon = model.Icon + config.Files = append(config.Files, req.AdditionalFiles...) + config.Files = append(config.Files, model.AdditionalFiles...) + + // TODO model.Overrides could be merged with user overrides (not defined yet) + if err := mergo.Merge(&model.Overrides, req.Overrides, mergo.WithOverride); err != nil { + return err + } + + if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil { + return err + } + + return nil } - return config, nil + + models, err := AvailableGalleryModels(galleries, basePath) + if err != nil { + return err + } + + model := FindGalleryElement(models, name, basePath) + if model == nil { + return fmt.Errorf("no model found with name %q", name) + } + + return applyModel(model) } -func ReadConfigFile(filePath string) (*Config, error) { - // Read the YAML file - yamlFile, err := os.ReadFile(filePath) - if err != nil { - return nil, fmt.Errorf("failed to read YAML file: %v", err) - } - - // Unmarshal YAML data into a Config struct - var config Config - err = yaml.Unmarshal(yamlFile, &config) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal YAML: %v", err) - } - - return &config, nil -} - -func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error { +func InstallModel(basePath, nameOverride string, config *ModelConfig, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error { // Create base path if it doesn't exist err := os.MkdirAll(basePath, 0750) if err != nil { @@ -219,3 +262,88 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides func galleryFileName(name string) string { return "._gallery_" + name + ".yaml" } + +func GetLocalModelConfiguration(basePath string, name string) (*ModelConfig, error) { + name = strings.ReplaceAll(name, string(os.PathSeparator), "__") + galleryFile := filepath.Join(basePath, galleryFileName(name)) + return ReadConfigFile[ModelConfig](galleryFile) +} + +func DeleteModelFromSystem(basePath string, name string, additionalFiles []string) error { + // os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths. + name = strings.ReplaceAll(name, string(os.PathSeparator), "__") + + configFile := filepath.Join(basePath, fmt.Sprintf("%s.yaml", name)) + + galleryFile := filepath.Join(basePath, galleryFileName(name)) + + for _, f := range []string{configFile, galleryFile} { + if err := utils.VerifyPath(f, basePath); err != nil { + return fmt.Errorf("failed to verify path %s: %w", f, err) + } + } + + var err error + // Delete all the files associated to the model + // read the model config + galleryconfig, err := ReadConfigFile[ModelConfig](galleryFile) + if err != nil { + log.Error().Err(err).Msgf("failed to read gallery file %s", configFile) + } + + var filesToRemove []string + + // Remove additional files + if galleryconfig != nil { + for _, f := range galleryconfig.Files { + fullPath := filepath.Join(basePath, f.Filename) + filesToRemove = append(filesToRemove, fullPath) + } + } + + for _, f := range additionalFiles { + fullPath := filepath.Join(filepath.Join(basePath, f)) + filesToRemove = append(filesToRemove, fullPath) + } + + filesToRemove = append(filesToRemove, configFile) + filesToRemove = append(filesToRemove, galleryFile) + + // skip duplicates + filesToRemove = utils.Unique(filesToRemove) + + // Removing files + for _, f := range filesToRemove { + if e := os.Remove(f); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e)) + } + } + + return err +} + +// This is ***NEVER*** going to be perfect or finished. +// This is a BEST EFFORT function to surface known-vulnerable models to users. +func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error { + galleryModels, err := AvailableGalleryModels(galleries, basePath) + if err != nil { + return err + } + for _, gM := range galleryModels { + if gM.Installed { + err = errors.Join(err, SafetyScanGalleryModel(gM)) + } + } + return err +} + +func SafetyScanGalleryModel(galleryModel *GalleryModel) error { + for _, file := range galleryModel.AdditionalFiles { + scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI)) + if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) { + log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!") + return err + } + } + return nil +} diff --git a/core/gallery/models_test.go b/core/gallery/models_test.go index ef4faed8..26151aa8 100644 --- a/core/gallery/models_test.go +++ b/core/gallery/models_test.go @@ -16,12 +16,18 @@ const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b8 var _ = Describe("Model test", func() { + BeforeEach(func() { + if os.Getenv("FIXTURES") == "" { + Skip("FIXTURES env var not set, skipping model tests") + } + }) + Context("Downloading", func() { It("applies model correctly", func() { tempdir, err := os.MkdirTemp("", "test") Expect(err).ToNot(HaveOccurred()) defer os.RemoveAll(tempdir) - c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) + c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true) Expect(err).ToNot(HaveOccurred()) @@ -107,7 +113,7 @@ var _ = Describe("Model test", func() { tempdir, err := os.MkdirTemp("", "test") Expect(err).ToNot(HaveOccurred()) defer os.RemoveAll(tempdir) - c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) + c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true) @@ -123,7 +129,7 @@ var _ = Describe("Model test", func() { tempdir, err := os.MkdirTemp("", "test") Expect(err).ToNot(HaveOccurred()) defer os.RemoveAll(tempdir) - c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) + c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true) @@ -149,7 +155,7 @@ var _ = Describe("Model test", func() { tempdir, err := os.MkdirTemp("", "test") Expect(err).ToNot(HaveOccurred()) defer os.RemoveAll(tempdir) - c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) + c, err := ReadConfigFile[ModelConfig](filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true) diff --git a/core/gallery/models_types.go b/core/gallery/models_types.go new file mode 100644 index 00000000..b2d3944d --- /dev/null +++ b/core/gallery/models_types.go @@ -0,0 +1,46 @@ +package gallery + +import ( + "fmt" + + "github.com/mudler/LocalAI/core/config" +) + +// GalleryModel is the struct used to represent a model in the gallery returned by the endpoint. +// It is used to install the model by resolving the URL and downloading the files. +// The other fields are used to override the configuration of the model. +type GalleryModel struct { + Metadata `json:",inline" yaml:",inline"` + // config_file is read in the situation where URL is blank - and therefore this is a base config. + ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"` + // Overrides are used to override the configuration of the model located at URL + Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"` +} + +func (m *GalleryModel) SetGallery(gallery config.Gallery) { + m.Gallery = gallery +} + +func (m *GalleryModel) SetInstalled(installed bool) { + m.Installed = installed +} + +func (m *GalleryModel) GetName() string { + return m.Name +} + +func (m *GalleryModel) GetGallery() config.Gallery { + return m.Gallery +} + +func (m GalleryModel) ID() string { + return fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) +} + +func (m *GalleryModel) GetTags() []string { + return m.Tags +} + +func (m *GalleryModel) GetDescription() string { + return m.Description +} diff --git a/core/gallery/op.go b/core/gallery/op.go deleted file mode 100644 index d3795a00..00000000 --- a/core/gallery/op.go +++ /dev/null @@ -1,25 +0,0 @@ -package gallery - -import "github.com/mudler/LocalAI/core/config" - -type GalleryOp struct { - Id string - GalleryModelName string - ConfigURL string - Delete bool - - Req GalleryModel - Galleries []config.Gallery -} - -type GalleryOpStatus struct { - Deletion bool `json:"deletion"` // Deletion is true if the operation is a deletion - FileName string `json:"file_name"` - Error error `json:"error"` - Processed bool `json:"processed"` - Message string `json:"message"` - Progress float64 `json:"progress"` - TotalFileSize string `json:"file_size"` - DownloadedFileSize string `json:"downloaded_size"` - GalleryModelName string `json:"gallery_model_name"` -} diff --git a/core/gallery/request.go b/core/gallery/request.go deleted file mode 100644 index 5e7308fd..00000000 --- a/core/gallery/request.go +++ /dev/null @@ -1,76 +0,0 @@ -package gallery - -import ( - "fmt" - "strings" - - "github.com/mudler/LocalAI/core/config" -) - -// GalleryModel is the struct used to represent a model in the gallery returned by the endpoint. -// It is used to install the model by resolving the URL and downloading the files. -// The other fields are used to override the configuration of the model. -type GalleryModel struct { - Metadata `json:",inline" yaml:",inline"` - // config_file is read in the situation where URL is blank - and therefore this is a base config. - ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"` - // Overrides are used to override the configuration of the model located at URL - Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"` -} - -type Metadata struct { - URL string `json:"url,omitempty" yaml:"url,omitempty"` - Name string `json:"name,omitempty" yaml:"name,omitempty"` - Description string `json:"description,omitempty" yaml:"description,omitempty"` - License string `json:"license,omitempty" yaml:"license,omitempty"` - URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"` - Icon string `json:"icon,omitempty" yaml:"icon,omitempty"` - Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"` - // AdditionalFiles are used to add additional files to the model - AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"` - // Gallery is a reference to the gallery which contains the model - Gallery config.Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"` - // Installed is used to indicate if the model is installed or not - Installed bool `json:"installed,omitempty" yaml:"installed,omitempty"` -} - -func (m GalleryModel) ID() string { - return fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) -} - -type GalleryModels []*GalleryModel - -func (gm GalleryModels) Search(term string) GalleryModels { - var filteredModels GalleryModels - - for _, m := range gm { - if strings.Contains(m.Name, term) || - strings.Contains(m.Description, term) || - strings.Contains(m.Gallery.Name, term) || - strings.Contains(strings.Join(m.Tags, ","), term) { - filteredModels = append(filteredModels, m) - } - } - return filteredModels -} - -func (gm GalleryModels) FindByName(name string) *GalleryModel { - for _, m := range gm { - if strings.EqualFold(m.Name, name) { - return m - } - } - return nil -} - -func (gm GalleryModels) Paginate(pageNum int, itemsNum int) GalleryModels { - start := (pageNum - 1) * itemsNum - end := start + itemsNum - if start > len(gm) { - start = len(gm) - } - if end > len(gm) { - end = len(gm) - } - return gm[start:end] -} diff --git a/core/gallery/request_test.go b/core/gallery/request_test.go index ed07f474..fb1b20d1 100644 --- a/core/gallery/request_test.go +++ b/core/gallery/request_test.go @@ -14,7 +14,7 @@ var _ = Describe("Gallery API tests", func() { URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main", }, } - e, err := GetGalleryConfigFromURL(req.URL, "") + e, err := GetGalleryConfigFromURL[ModelConfig](req.URL, "") Expect(err).ToNot(HaveOccurred()) Expect(e.Name).To(Equal("gpt4all-j")) }) diff --git a/core/http/app.go b/core/http/app.go index bce27397..ce8ce164 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -204,7 +204,7 @@ func API(application *application.Application) (*fiber.App, error) { utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - galleryService := services.NewGalleryService(application.ApplicationConfig()) + galleryService := services.NewGalleryService(application.ApplicationConfig(), application.ModelLoader()) galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader()) requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()) diff --git a/core/http/app_test.go b/core/http/app_test.go index cabdba39..d08f9dfa 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -485,29 +485,6 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred()) Expect(content["backend"]).To(Equal("llama")) }) - It("apply models from config", func() { - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml", - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - return response["processed"].(bool) - }, "900s", "10s").Should(Equal(true)) - - Eventually(func() []string { - models, _ := client.ListModels(context.TODO()) - modelList := []string{} - for _, m := range models.Models { - modelList = append(modelList, m.ID) - } - return modelList - }, "360s", "10s").Should(ContainElements("hermes-2-pro-mistral")) - }) It("apply models without overrides", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: bertEmbeddingsURL, @@ -533,80 +510,6 @@ var _ = Describe("API test", func() { Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this")) }) - It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - - modelName := "hermes-2-pro-mistral" - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml", - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - return response["processed"].(bool) - }, "900s", "10s").Should(Equal(true)) - - By("testing chat") - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ - { - Role: "user", - Content: "How much is 2+2?", - }, - }}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four"))) - - By("testing functions") - resp2, err := client.CreateChatCompletion( - context.TODO(), - openai.ChatCompletionRequest{ - Model: modelName, - Messages: []openai.ChatCompletionMessage{ - { - Role: "user", - Content: "What is the weather like in San Francisco (celsius)?", - }, - }, - Functions: []openai.FunctionDefinition{ - openai.FunctionDefinition{ - Name: "get_current_weather", - Description: "Get the current weather", - Parameters: jsonschema.Definition{ - Type: jsonschema.Object, - Properties: map[string]jsonschema.Definition{ - "location": { - Type: jsonschema.String, - Description: "The city and state, e.g. San Francisco, CA", - }, - "unit": { - Type: jsonschema.String, - Enum: []string{"celcius", "fahrenheit"}, - }, - }, - Required: []string{"location"}, - }, - }, - }, - }) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) - - var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) - Expect(err).ToNot(HaveOccurred()) - Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res)) - Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) - Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) - }) }) }) @@ -673,6 +576,82 @@ var _ = Describe("API test", func() { _, err = os.ReadDir(tmpdir) Expect(err).To(HaveOccurred()) }) + + It("runs gguf models (chat)", Label("llama-gguf"), func() { + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } + + modelName := "qwen3-1.7b" + response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ + ID: "localai@" + modelName, + }) + + Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) + + uuid := response["uuid"].(string) + + Eventually(func() bool { + response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) + return response["processed"].(bool) + }, "900s", "10s").Should(Equal(true)) + + By("testing chat") + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ + { + Role: "user", + Content: "How much is 2+2?", + }, + }}) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1)) + Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four"))) + + By("testing functions") + resp2, err := client.CreateChatCompletion( + context.TODO(), + openai.ChatCompletionRequest{ + Model: modelName, + Messages: []openai.ChatCompletionMessage{ + { + Role: "user", + Content: "What is the weather like in San Francisco (celsius)?", + }, + }, + Functions: []openai.FunctionDefinition{ + openai.FunctionDefinition{ + Name: "get_current_weather", + Description: "Get the current weather", + Parameters: jsonschema.Definition{ + Type: jsonschema.Object, + Properties: map[string]jsonschema.Definition{ + "location": { + Type: jsonschema.String, + Description: "The city and state, e.g. San Francisco, CA", + }, + "unit": { + Type: jsonschema.String, + Enum: []string{"celcius", "fahrenheit"}, + }, + }, + Required: []string{"location"}, + }, + }, + }, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp2.Choices)).To(Equal(1)) + Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) + + var res map[string]string + err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) + Expect(err).ToNot(HaveOccurred()) + Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res)) + Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) + Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) + }) + It("installs and is capable to run tts", Label("tts"), func() { if runtime.GOOS != "linux" { Skip("test supported only on linux") diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 589604cd..02fc6ca5 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -331,7 +331,7 @@ func modelActionItems(m *gallery.GalleryModel, processTracker ProcessTracker, ga elem.If( currentlyProcessing, elem.Node( // If currently installing, show progress bar - elem.Raw(StartProgressBar(jobID, "0", progressMessage)), + elem.Raw(StartModelProgressBar(jobID, "0", progressMessage)), ), // Otherwise, show install button (if not installed) or display "Installed" elem.If(m.Installed, elem.Node(elem.Div( @@ -418,3 +418,335 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g return wrapper.Render() } + +func ListBackends(backends []*gallery.GalleryBackend, processTracker ProcessTracker, galleryService *services.GalleryService) string { + backendsElements := []elem.Node{} + + for _, b := range backends { + elems := []elem.Node{} + + if b.Icon == "" { + b.Icon = noImage + } + + divProperties := attrs.Props{ + "class": "flex justify-center items-center", + } + + elems = append(elems, + elem.Div(divProperties, + elem.A(attrs.Props{ + "href": "#!", + }, + elem.Img(attrs.Props{ + "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", + "src": b.Icon, + "loading": "lazy", + }), + ), + ), + ) + + elems = append(elems, + backendDescription(b), + backendActionItems(b, processTracker, galleryService), + ) + backendsElements = append(backendsElements, + elem.Div( + attrs.Props{ + "class": "me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2 bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden transition-all duration-300 hover:shadow-lg hover:shadow-blue-900/20 hover:-translate-y-1 hover:border-blue-700/50", + }, + elem.Div( + attrs.Props{}, + elems..., + ), + ), + backendModal(b), + ) + } + + wrapper := elem.Div(attrs.Props{ + "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark", + }, backendsElements...) + + return wrapper.Render() +} + +func backendDescription(b *gallery.GalleryBackend) elem.Node { + return elem.Div( + attrs.Props{ + "class": "p-6 text-surface dark:text-white", + }, + elem.H5( + attrs.Props{ + "class": "mb-2 text-xl font-bold leading-tight", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(b.Name)), + ), + elem.Div( + attrs.Props{ + "class": "mb-4 text-sm truncate text-base", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(b.Description)), + ), + ) +} + +func backendActionItems(b *gallery.GalleryBackend, processTracker ProcessTracker, galleryService *services.GalleryService) elem.Node { + galleryID := fmt.Sprintf("%s@%s", b.Gallery.Name, b.Name) + currentlyProcessing := processTracker.Exists(galleryID) + jobID := "" + isDeletionOp := false + if currentlyProcessing { + status := galleryService.GetStatus(galleryID) + if status != nil && status.Deletion { + isDeletionOp = true + } + jobID = processTracker.Get(galleryID) + } + + nodes := []elem.Node{ + cardSpan("Repository: "+b.Gallery.Name, "fa-brands fa-git-alt"), + } + + if b.License != "" { + nodes = append(nodes, + cardSpan("License: "+b.License, "fas fa-book"), + ) + } + + progressMessage := "Installation" + if isDeletionOp { + progressMessage = "Deletion" + } + + return elem.Div( + attrs.Props{ + "class": "px-6 pt-4 pb-2", + }, + elem.P( + attrs.Props{ + "class": "mb-4 text-base", + }, + nodes..., + ), + elem.Div( + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + "class": "flow-root", + }, + backendInfoButton(b), + elem.Div( + attrs.Props{ + "class": "float-right", + }, + elem.If( + currentlyProcessing, + elem.Node( + elem.Raw(StartModelProgressBar(jobID, "0", progressMessage)), + ), + elem.If(b.Installed, + elem.Node(elem.Div( + attrs.Props{}, + backendReInstallButton(galleryID), + backendDeleteButton(galleryID), + )), + backendInstallButton(galleryID), + ), + ), + ), + ), + ) +} + +func backendModal(b *gallery.GalleryBackend) elem.Node { + urls := []elem.Node{} + for _, url := range b.URLs { + urls = append(urls, + elem.Li(attrs.Props{}, link(url, url)), + ) + } + + tagsNodes := []elem.Node{} + for _, tag := range b.Tags { + tagsNodes = append(tagsNodes, + searchableElement(tag, "fas fa-tag"), + ) + } + + modalID := fmt.Sprintf("modal-%s", dropBadChars(fmt.Sprintf("%s@%s", b.Gallery.Name, b.Name))) + + return elem.Div( + attrs.Props{ + "id": modalID, + "tabindex": "-1", + "aria-hidden": "true", + "class": "hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-full max-h-full bg-gray-900/50", + }, + elem.Div( + attrs.Props{ + "class": "relative p-4 w-full max-w-2xl h-[90vh] mx-auto mt-[5vh]", + }, + elem.Div( + attrs.Props{ + "class": "relative bg-white rounded-lg shadow dark:bg-gray-700 h-full flex flex-col", + }, + elem.Div( + attrs.Props{ + "class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600", + }, + elem.H3( + attrs.Props{ + "class": "text-xl font-semibold text-gray-900 dark:text-white", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(b.Name)), + ), + elem.Button( + attrs.Props{ + "class": "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", + "data-modal-hide": modalID, + }, + elem.Raw( + ``, + ), + elem.Span( + attrs.Props{ + "class": "sr-only", + }, + elem.Text("Close modal"), + ), + ), + ), + elem.Div( + attrs.Props{ + "class": "p-4 md:p-5 space-y-4 overflow-y-auto flex-grow", + }, + elem.Div( + attrs.Props{ + "class": "flex justify-center items-center", + }, + elem.Img(attrs.Props{ + "src": b.Icon, + "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", + "loading": "lazy", + }), + ), + elem.P( + attrs.Props{ + "class": "text-base leading-relaxed text-gray-500 dark:text-gray-400", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(b.Description)), + ), + elem.Div( + attrs.Props{ + "class": "flex flex-wrap gap-2", + }, + tagsNodes..., + ), + elem.Div( + attrs.Props{ + "class": "text-base leading-relaxed text-gray-500 dark:text-gray-400", + }, + elem.Ul(attrs.Props{}, urls...), + ), + ), + elem.Div( + attrs.Props{ + "class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600", + }, + elem.Button( + attrs.Props{ + "data-modal-hide": modalID, + "type": "button", + "class": "text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800", + }, + elem.Text("Close"), + ), + ), + ), + ), + ) +} + +func backendInfoButton(b *gallery.GalleryBackend) elem.Node { + modalID := fmt.Sprintf("modal-%s", dropBadChars(fmt.Sprintf("%s@%s", b.Gallery.Name, b.Name))) + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "inline-flex items-center rounded-lg bg-gray-700 hover:bg-gray-600 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out", + "data-modal-target": modalID, + "data-modal-toggle": modalID, + }, + elem.P( + attrs.Props{ + "class": "flex items-center", + }, + elem.I( + attrs.Props{ + "class": "fas fa-info-circle pr-2", + }, + ), + elem.Text("Info"), + ), + ) +} + +func backendInstallButton(galleryID string) elem.Node { + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-flex items-center rounded-lg bg-blue-600 hover:bg-blue-700 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out shadow hover:shadow-lg", + "hx-swap": "outerHTML", + "hx-post": "browse/install/backend/" + galleryID, + }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-download pr-2", + }, + ), + elem.Text("Install"), + ) +} + +func backendReInstallButton(galleryID string) elem.Node { + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryID), + "hx-swap": "outerHTML", + "hx-post": "browse/install/backend/" + galleryID, + }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-arrow-rotate-right pr-2", + }, + ), + elem.Text("Reinstall"), + ) +} + +func backendDeleteButton(galleryID string) elem.Node { + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "hx-confirm": "Are you sure you wish to delete the backend?", + "class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryID), + "hx-swap": "outerHTML", + "hx-post": "browse/delete/backend/" + galleryID, + }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-cancel pr-2", + }, + ), + elem.Text("Delete"), + ) +} diff --git a/core/http/elements/progressbar.go b/core/http/elements/progressbar.go index 7dc340b2..64c806fe 100644 --- a/core/http/elements/progressbar.go +++ b/core/http/elements/progressbar.go @@ -6,7 +6,7 @@ import ( "github.com/microcosm-cc/bluemonday" ) -func DoneProgress(galleryID, text string, showDelete bool) string { +func DoneModelProgress(galleryID, text string, showDelete bool) string { return elem.Div( attrs.Props{ "id": "action-div-" + dropBadChars(galleryID), @@ -24,6 +24,24 @@ func DoneProgress(galleryID, text string, showDelete bool) string { ).Render() } +func DoneBackendProgress(galleryID, text string, showDelete bool) string { + return elem.Div( + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + }, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(text)), + ), + elem.If(showDelete, backendDeleteButton(galleryID), reInstallButton(galleryID)), + ).Render() +} + func ErrorProgress(err, galleryName string) string { return elem.Div( attrs.Props{}, @@ -57,14 +75,22 @@ func ProgressBar(progress string) string { ).Render() } -func StartProgressBar(uid, progress, text string) string { +func StartModelProgressBar(uid, progress, text string) string { + return progressBar(uid, "browse/job/", progress, text) +} + +func StartBackendProgressBar(uid, progress, text string) string { + return progressBar(uid, "browse/backend/job/", progress, text) +} + +func progressBar(uid, url, progress, text string) string { if progress == "" { progress = "0" } return elem.Div( attrs.Props{ "hx-trigger": "done", - "hx-get": "browse/job/" + uid, + "hx-get": url + uid, "hx-swap": "outerHTML", "hx-target": "this", }, @@ -77,7 +103,7 @@ func StartProgressBar(uid, progress, text string) string { }, elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive elem.Div(attrs.Props{ - "hx-get": "browse/job/progress/" + uid, + "hx-get": url + "progress/" + uid, "hx-trigger": "every 600ms", "hx-target": "this", "hx-swap": "innerHTML", diff --git a/core/http/endpoints/localai/backend.go b/core/http/endpoints/localai/backend.go new file mode 100644 index 00000000..bcadde69 --- /dev/null +++ b/core/http/endpoints/localai/backend.go @@ -0,0 +1,152 @@ +package localai + +import ( + "encoding/json" + "fmt" + + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/utils" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/core/services" + "github.com/rs/zerolog/log" +) + +type BackendEndpointService struct { + galleries []config.Gallery + backendPath string + backendApplier *services.GalleryService +} + +type GalleryBackend struct { + ID string `json:"id"` +} + +func CreateBackendEndpointService(galleries []config.Gallery, backendPath string, backendApplier *services.GalleryService) BackendEndpointService { + return BackendEndpointService{ + galleries: galleries, + backendPath: backendPath, + backendApplier: backendApplier, + } +} + +// GetOpStatusEndpoint returns the job status +// @Summary Returns the job status +// @Success 200 {object} services.BackendOpStatus "Response" +// @Router /backends/jobs/{uuid} [get] +func (mgs *BackendEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + status := mgs.backendApplier.GetStatus(c.Params("uuid")) + if status == nil { + return fmt.Errorf("could not find any status for ID") + } + return c.JSON(status) + } +} + +// GetAllStatusEndpoint returns all the jobs status progress +// @Summary Returns all the jobs status progress +// @Success 200 {object} map[string]services.BackendOpStatus "Response" +// @Router /backends/jobs [get] +func (mgs *BackendEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + return c.JSON(mgs.backendApplier.GetAllStatus()) + } +} + +// ApplyBackendEndpoint installs a new backend to a LocalAI instance +// @Summary Install backends to LocalAI. +// @Param request body BackendModel true "query params" +// @Success 200 {object} schema.BackendResponse "Response" +// @Router /backends/apply [post] +func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(GalleryBackend) + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + uuid, err := uuid.NewUUID() + if err != nil { + return err + } + mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{ + ID: uuid.String(), + GalleryElementName: input.ID, + Galleries: mgs.galleries, + } + + return c.JSON(schema.BackendResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%sbackends/jobs/%s", utils.BaseURL(c), uuid.String())}) + } +} + +// DeleteBackendEndpoint lets delete backends from a LocalAI instance +// @Summary delete backends from LocalAI. +// @Param name path string true "Backend name" +// @Success 200 {object} schema.BackendResponse "Response" +// @Router /backends/delete/{name} [post] +func (mgs *BackendEndpointService) DeleteBackendEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + backendName := c.Params("name") + + mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{ + Delete: true, + GalleryElementName: backendName, + Galleries: mgs.galleries, + } + + uuid, err := uuid.NewUUID() + if err != nil { + return err + } + + return c.JSON(schema.BackendResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%sbackends/jobs/%s", utils.BaseURL(c), uuid.String())}) + } +} + +// ListBackendsEndpoint list the available backends configured in LocalAI +// @Summary List all Backends +// @Success 200 {object} []gallery.GalleryBackend "Response" +// @Router /backends [get] +func (mgs *BackendEndpointService) ListBackendsEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + backends, err := gallery.ListSystemBackends(mgs.backendPath) + if err != nil { + return err + } + return c.JSON(backends) + } +} + +// ListModelGalleriesEndpoint list the available galleries configured in LocalAI +// @Summary List all Galleries +// @Success 200 {object} []config.Gallery "Response" +// @Router /backends/galleries [get] +// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents! +func (mgs *BackendEndpointService) ListBackendGalleriesEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + log.Debug().Msgf("Listing backend galleries %+v", mgs.galleries) + dat, err := json.Marshal(mgs.galleries) + if err != nil { + return err + } + return c.Send(dat) + } +} + +// ListAvailableBackendsEndpoint list the available backends in the galleries configured in LocalAI +// @Summary List all available Backends +// @Success 200 {object} []gallery.GalleryBackend "Response" +// @Router /backends/available [get] +func (mgs *BackendEndpointService) ListAvailableBackendsEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + backends, err := gallery.AvailableBackends(mgs.galleries, mgs.backendPath) + if err != nil { + return err + } + return c.JSON(backends) + } +} diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index c2710991..e75e74b7 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -3,7 +3,6 @@ package localai import ( "encoding/json" "fmt" - "slices" "github.com/gofiber/fiber/v2" "github.com/google/uuid" @@ -22,8 +21,7 @@ type ModelGalleryEndpointService struct { } type GalleryModel struct { - ID string `json:"id"` - ConfigURL string `json:"config_url"` + ID string `json:"id"` gallery.GalleryModel } @@ -37,7 +35,7 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath str // GetOpStatusEndpoint returns the job status // @Summary Returns the job status -// @Success 200 {object} gallery.GalleryOpStatus "Response" +// @Success 200 {object} services.GalleryOpStatus "Response" // @Router /models/jobs/{uuid} [get] func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { @@ -51,7 +49,7 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) // GetAllStatusEndpoint returns all the jobs status progress // @Summary Returns all the jobs status progress -// @Success 200 {object} map[string]gallery.GalleryOpStatus "Response" +// @Success 200 {object} map[string]services.GalleryOpStatus "Response" // @Router /models/jobs [get] func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { @@ -76,12 +74,11 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe if err != nil { return err } - mgs.galleryApplier.C <- gallery.GalleryOp{ - Req: input.GalleryModel, - Id: uuid.String(), - GalleryModelName: input.ID, - Galleries: mgs.galleries, - ConfigURL: input.ConfigURL, + mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{ + Req: input.GalleryModel, + ID: uuid.String(), + GalleryElementName: input.ID, + Galleries: mgs.galleries, } return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())}) @@ -97,9 +94,9 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib return func(c *fiber.Ctx) error { modelName := c.Params("name") - mgs.galleryApplier.C <- gallery.GalleryOp{ - Delete: true, - GalleryModelName: modelName, + mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{ + Delete: true, + GalleryElementName: modelName, } uuid, err := uuid.NewUUID() @@ -157,58 +154,3 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib return c.Send(dat) } } - -// AddModelGalleryEndpoint adds a gallery in LocalAI -// @Summary Adds a gallery in LocalAI -// @Param request body config.Gallery true "Gallery details" -// @Success 200 {object} []config.Gallery "Response" -// @Router /models/galleries [post] -func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - input := new(config.Gallery) - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return err - } - if slices.ContainsFunc(mgs.galleries, func(gallery config.Gallery) bool { - return gallery.Name == input.Name - }) { - return fmt.Errorf("%s already exists", input.Name) - } - dat, err := json.Marshal(mgs.galleries) - if err != nil { - return err - } - log.Debug().Msgf("Adding %+v to gallery list", *input) - mgs.galleries = append(mgs.galleries, *input) - return c.Send(dat) - } -} - -// RemoveModelGalleryEndpoint remove a gallery in LocalAI -// @Summary removes a gallery from LocalAI -// @Param request body config.Gallery true "Gallery details" -// @Success 200 {object} []config.Gallery "Response" -// @Router /models/galleries [delete] -func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - input := new(config.Gallery) - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return err - } - if !slices.ContainsFunc(mgs.galleries, func(gallery config.Gallery) bool { - return gallery.Name == input.Name - }) { - return fmt.Errorf("%s is not currently registered", input.Name) - } - mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool { - return gallery.Name == input.Name - }) - dat, err := json.Marshal(mgs.galleries) - if err != nil { - return err - } - return c.Send(dat) - } -} diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go index 92d80a3a..ea01a670 100644 --- a/core/http/endpoints/localai/system.go +++ b/core/http/endpoints/localai/system.go @@ -21,6 +21,9 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf for b := range appConfig.ExternalGRPCBackends { availableBackends = append(availableBackends, b) } + for b := range ml.GetAllExternalBackends(nil) { + availableBackends = append(availableBackends, b) + } sysmodels := []schema.SysInfoModel{} for _, m := range loadedModels { diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index 57cf8809..07bc92c6 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -12,10 +12,10 @@ import ( ) func WelcomeEndpoint(appConfig *config.ApplicationConfig, - cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error { + cl *config.BackendConfigLoader, ml *model.ModelLoader, opcache *services.OpCache) func(*fiber.Ctx) error { return func(c *fiber.Ctx) error { backendConfigs := cl.GetAllBackendConfigs() - galleryConfigs := map[string]*gallery.Config{} + galleryConfigs := map[string]*gallery.ModelConfig{} for _, m := range backendConfigs { cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) @@ -28,7 +28,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) // Get model statuses to display in the UI the operation in progress - processingModels, taskTypes := modelStatus() + processingModels, taskTypes := opcache.GetStatus() summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index e369a559..e0217be3 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -30,10 +30,16 @@ func RegisterLocalAIRoutes(router *fiber.App, router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint()) router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint()) - router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint()) - router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint()) router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint()) router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint()) + + backendGalleryEndpointService := localai.CreateBackendEndpointService(appConfig.BackendGalleries, appConfig.BackendsPath, galleryService) + router.Post("/backends/apply", backendGalleryEndpointService.ApplyBackendEndpoint()) + router.Post("/backends/delete/:name", backendGalleryEndpointService.DeleteBackendEndpoint()) + router.Get("/backends", backendGalleryEndpointService.ListBackendsEndpoint()) + router.Get("/backends/available", backendGalleryEndpointService.ListAvailableBackendsEndpoint()) + router.Get("/backends/galleries", backendGalleryEndpointService.ListBackendGalleriesEndpoint()) + router.Get("/backends/jobs/:uuid", backendGalleryEndpointService.GetOpStatusEndpoint()) } router.Post("/tts", diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index acbad0b6..6a59ad1a 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -1,13 +1,6 @@ package routes import ( - "fmt" - "html/template" - "math" - "sort" - "strconv" - "strings" - "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/elements" @@ -17,78 +10,20 @@ import ( "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/pkg/model" - "github.com/mudler/LocalAI/pkg/xsync" "github.com/gofiber/fiber/v2" - "github.com/google/uuid" - "github.com/microcosm-cc/bluemonday" - "github.com/rs/zerolog/log" ) -type modelOpCache struct { - status *xsync.SyncedMap[string, string] -} - -func NewModelOpCache() *modelOpCache { - return &modelOpCache{ - status: xsync.NewSyncedMap[string, string](), - } -} - -func (m *modelOpCache) Set(key string, value string) { - m.status.Set(key, value) -} - -func (m *modelOpCache) Get(key string) string { - return m.status.Get(key) -} - -func (m *modelOpCache) DeleteUUID(uuid string) { - for _, k := range m.status.Keys() { - if m.status.Get(k) == uuid { - m.status.Delete(k) - } - } -} - -func (m *modelOpCache) Map() map[string]string { - return m.status.Map() -} - -func (m *modelOpCache) Exists(key string) bool { - return m.status.Exists(key) -} - func RegisterUIRoutes(app *fiber.App, cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) { - // keeps the state of models that are being installed from the UI - var processingModels = NewModelOpCache() + // keeps the state of ops that are started from the UI + var processingOps = services.NewOpCache(galleryService) - // modelStatus returns the current status of the models being processed (installation or deletion) - // it is called asynchronously from the UI - modelStatus := func() (map[string]string, map[string]string) { - processingModelsData := processingModels.Map() - - taskTypes := map[string]string{} - - for k, v := range processingModelsData { - status := galleryService.GetStatus(v) - taskTypes[k] = "Installation" - if status != nil && status.Deletion { - taskTypes[k] = "Deletion" - } else if status == nil { - taskTypes[k] = "Waiting" - } - } - - return processingModelsData, taskTypes - } - - app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus)) + app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps)) if p2p.IsP2PEnabled() { app.Get("/p2p", func(c *fiber.Ctx) error { @@ -124,262 +59,8 @@ func RegisterUIRoutes(app *fiber.App, } if !appConfig.DisableGalleryEndpoint { - - // Show the Models page (all models) - app.Get("/browse", func(c *fiber.Ctx) error { - term := c.Query("term") - page := c.Query("page") - items := c.Query("items") - - models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) - if err != nil { - log.Error().Err(err).Msg("could not list models from galleries") - return c.Status(fiber.StatusInternalServerError).Render("views/error", fiber.Map{ - "Title": "LocalAI - Models", - "BaseURL": utils.BaseURL(c), - "Version": internal.PrintableVersion(), - "ErrorCode": "500", - "ErrorMessage": err.Error(), - }) - } - - // Get all available tags - allTags := map[string]struct{}{} - tags := []string{} - for _, m := range models { - for _, t := range m.Tags { - allTags[t] = struct{}{} - } - } - for t := range allTags { - tags = append(tags, t) - } - sort.Strings(tags) - - if term != "" { - models = gallery.GalleryModels(models).Search(term) - } - - // Get model statuses - processingModelsData, taskTypes := modelStatus() - - summary := fiber.Map{ - "Title": "LocalAI - Models", - "BaseURL": utils.BaseURL(c), - "Version": internal.PrintableVersion(), - "Models": template.HTML(elements.ListModels(models, processingModels, galleryService)), - "Repositories": appConfig.Galleries, - "AllTags": tags, - "ProcessingModels": processingModelsData, - "AvailableModels": len(models), - "IsP2PEnabled": p2p.IsP2PEnabled(), - - "TaskTypes": taskTypes, - // "ApplicationConfig": appConfig, - } - - if page == "" { - page = "1" - } - - if page != "" { - // return a subset of the models - pageNum, err := strconv.Atoi(page) - if err != nil { - return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") - } - - if pageNum == 0 { - return c.Render("views/models", summary) - } - - itemsNum, err := strconv.Atoi(items) - if err != nil { - itemsNum = 21 - } - - totalPages := int(math.Ceil(float64(len(models)) / float64(itemsNum))) - - models = models.Paginate(pageNum, itemsNum) - - prevPage := pageNum - 1 - nextPage := pageNum + 1 - if prevPage < 1 { - prevPage = 1 - } - if nextPage > totalPages { - nextPage = totalPages - } - if prevPage != pageNum { - summary["PrevPage"] = prevPage - } - summary["NextPage"] = nextPage - summary["TotalPages"] = totalPages - summary["CurrentPage"] = pageNum - summary["Models"] = template.HTML(elements.ListModels(models, processingModels, galleryService)) - } - - // Render index - return c.Render("views/models", summary) - }) - - // Show the models, filtered from the user input - // https://htmx.org/examples/active-search/ - app.Post("/browse/search/models", func(c *fiber.Ctx) error { - page := c.Query("page") - items := c.Query("items") - - form := struct { - Search string `form:"search"` - }{} - if err := c.BodyParser(&form); err != nil { - return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) - } - - models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) - - if page != "" { - // return a subset of the models - pageNum, err := strconv.Atoi(page) - if err != nil { - return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") - } - - itemsNum, err := strconv.Atoi(items) - if err != nil { - itemsNum = 21 - } - - models = models.Paginate(pageNum, itemsNum) - } - - if form.Search != "" { - models = models.Search(form.Search) - } - - return c.SendString(elements.ListModels(models, processingModels, galleryService)) - }) - - /* - - Install routes - - */ - - // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service - // https://htmx.org/examples/progress-bar/ - app.Post("/browse/install/model/:id", func(c *fiber.Ctx) error { - galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! - log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID) - - id, err := uuid.NewUUID() - if err != nil { - return err - } - - uid := id.String() - - processingModels.Set(galleryID, uid) - - op := gallery.GalleryOp{ - Id: uid, - GalleryModelName: galleryID, - Galleries: appConfig.Galleries, - } - go func() { - galleryService.C <- op - }() - - return c.SendString(elements.StartProgressBar(uid, "0", "Installation")) - }) - - // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service - // https://htmx.org/examples/progress-bar/ - app.Post("/browse/delete/model/:id", func(c *fiber.Ctx) error { - galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! - log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID) - var galleryName = galleryID - if strings.Contains(galleryID, "@") { - // if the galleryID contains a @ it means that it's a model from a gallery - // but we want to delete it from the local models which does not need - // a repository ID - galleryName = strings.Split(galleryID, "@")[1] - } - - id, err := uuid.NewUUID() - if err != nil { - return err - } - - uid := id.String() - - // Track the deletion job by galleryID and galleryName - // The GalleryID contains information about the repository, - // while the GalleryName is ONLY the name of the model - processingModels.Set(galleryName, uid) - processingModels.Set(galleryID, uid) - - op := gallery.GalleryOp{ - Id: uid, - Delete: true, - GalleryModelName: galleryName, - } - go func() { - galleryService.C <- op - cl.RemoveBackendConfig(galleryName) - }() - - return c.SendString(elements.StartProgressBar(uid, "0", "Deletion")) - }) - - // Display the job current progress status - // If the job is done, we trigger the /browse/job/:uid route - // https://htmx.org/examples/progress-bar/ - app.Get("/browse/job/progress/:uid", func(c *fiber.Ctx) error { - jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! - - status := galleryService.GetStatus(jobUID) - if status == nil { - //fmt.Errorf("could not find any status for ID") - return c.SendString(elements.ProgressBar("0")) - } - - if status.Progress == 100 { - c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done) - return c.SendString(elements.ProgressBar("100")) - } - if status.Error != nil { - // TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user - processingModels.DeleteUUID(jobUID) - return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName)) - } - - return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) - }) - - // this route is hit when the job is done, and we display the - // final state (for now just displays "Installation completed") - app.Get("/browse/job/:uid", func(c *fiber.Ctx) error { - jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! - - status := galleryService.GetStatus(jobUID) - - galleryID := "" - processingModels.DeleteUUID(jobUID) - if galleryID == "" { - log.Debug().Msgf("no processing model found for job : %+v\n", jobUID) - } - - log.Debug().Msgf("JOB finished : %+v\n", status) - showDelete := true - displayText := "Installation completed" - if status.Deletion { - showDelete = false - displayText = "Deletion completed" - } - - return c.SendString(elements.DoneProgress(galleryID, displayText, showDelete)) - }) + registerGalleryRoutes(app, cl, appConfig, galleryService, processingOps) + registerBackendGalleryRoutes(app, appConfig, galleryService, processingOps) } app.Get("/talk/", func(c *fiber.Ctx) error { @@ -412,7 +93,7 @@ func RegisterUIRoutes(app *fiber.App, return c.Redirect(utils.BaseURL(c)) } modelThatCanBeUsed := "" - galleryConfigs := map[string]*gallery.Config{} + galleryConfigs := map[string]*gallery.ModelConfig{} for _, m := range backendConfigs { cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) @@ -452,7 +133,7 @@ func RegisterUIRoutes(app *fiber.App, backendConfigs := cl.GetAllBackendConfigs() modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) - galleryConfigs := map[string]*gallery.Config{} + galleryConfigs := map[string]*gallery.ModelConfig{} for _, m := range backendConfigs { cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) diff --git a/core/http/routes/ui_backend_gallery.go b/core/http/routes/ui_backend_gallery.go new file mode 100644 index 00000000..4dcc657b --- /dev/null +++ b/core/http/routes/ui_backend_gallery.go @@ -0,0 +1,258 @@ +package routes + +import ( + "fmt" + "html/template" + "math" + "sort" + "strconv" + "strings" + + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "github.com/microcosm-cc/bluemonday" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/elements" + "github.com/mudler/LocalAI/core/http/utils" + "github.com/mudler/LocalAI/core/services" + "github.com/mudler/LocalAI/internal" + "github.com/rs/zerolog/log" +) + +func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) { + + // Show the Backends page (all backends) + app.Get("/browse/backends", func(c *fiber.Ctx) error { + term := c.Query("term") + page := c.Query("page") + items := c.Query("items") + + backends, err := gallery.AvailableBackends(appConfig.BackendGalleries, appConfig.BackendsPath) + if err != nil { + log.Error().Err(err).Msg("could not list backends from galleries") + return c.Status(fiber.StatusInternalServerError).Render("views/error", fiber.Map{ + "Title": "LocalAI - Backends", + "BaseURL": utils.BaseURL(c), + "Version": internal.PrintableVersion(), + "ErrorCode": "500", + "ErrorMessage": err.Error(), + }) + } + + // Get all available tags + allTags := map[string]struct{}{} + tags := []string{} + for _, b := range backends { + for _, t := range b.Tags { + allTags[t] = struct{}{} + } + } + for t := range allTags { + tags = append(tags, t) + } + sort.Strings(tags) + + if term != "" { + backends = gallery.GalleryElements[*gallery.GalleryBackend](backends).Search(term) + } + + // Get backend statuses + processingBackendsData, taskTypes := opcache.GetStatus() + + summary := fiber.Map{ + "Title": "LocalAI - Backends", + "BaseURL": utils.BaseURL(c), + "Version": internal.PrintableVersion(), + "Backends": template.HTML(elements.ListBackends(backends, opcache, galleryService)), + "Repositories": appConfig.BackendGalleries, + "AllTags": tags, + "ProcessingBackends": processingBackendsData, + "AvailableBackends": len(backends), + "TaskTypes": taskTypes, + } + + if page == "" { + page = "1" + } + + if page != "" { + // return a subset of the backends + pageNum, err := strconv.Atoi(page) + if err != nil { + return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") + } + + if pageNum == 0 { + return c.Render("views/backends", summary) + } + + itemsNum, err := strconv.Atoi(items) + if err != nil { + itemsNum = 21 + } + + totalPages := int(math.Ceil(float64(len(backends)) / float64(itemsNum))) + + backends = backends.Paginate(pageNum, itemsNum) + + prevPage := pageNum - 1 + nextPage := pageNum + 1 + if prevPage < 1 { + prevPage = 1 + } + if nextPage > totalPages { + nextPage = totalPages + } + if prevPage != pageNum { + summary["PrevPage"] = prevPage + } + summary["NextPage"] = nextPage + summary["TotalPages"] = totalPages + summary["CurrentPage"] = pageNum + summary["Backends"] = template.HTML(elements.ListBackends(backends, opcache, galleryService)) + } + + // Render index + return c.Render("views/backends", summary) + }) + + // Show the backends, filtered from the user input + app.Post("/browse/search/backends", func(c *fiber.Ctx) error { + page := c.Query("page") + items := c.Query("items") + + form := struct { + Search string `form:"search"` + }{} + if err := c.BodyParser(&form); err != nil { + return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) + } + + backends, _ := gallery.AvailableBackends(appConfig.BackendGalleries, appConfig.BackendsPath) + + if page != "" { + // return a subset of the backends + pageNum, err := strconv.Atoi(page) + if err != nil { + return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") + } + + itemsNum, err := strconv.Atoi(items) + if err != nil { + itemsNum = 21 + } + + backends = backends.Paginate(pageNum, itemsNum) + } + + if form.Search != "" { + backends = backends.Search(form.Search) + } + + return c.SendString(elements.ListBackends(backends, opcache, galleryService)) + }) + + // Install backend route + app.Post("/browse/install/backend/:id", func(c *fiber.Ctx) error { + backendID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to install backend: %+v\n", backendID) + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + opcache.Set(backendID, uid) + + op := services.GalleryOp[gallery.GalleryBackend]{ + ID: uid, + GalleryElementName: backendID, + Galleries: appConfig.BackendGalleries, + } + go func() { + galleryService.BackendGalleryChannel <- op + }() + + return c.SendString(elements.StartBackendProgressBar(uid, "0", "Backend Installation")) + }) + + // Delete backend route + app.Post("/browse/delete/backend/:id", func(c *fiber.Ctx) error { + backendID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to delete backend: %+v\n", backendID) + var backendName = backendID + if strings.Contains(backendID, "@") { + // TODO: this is ugly workaround - we should handle this consistently across the codebase + backendName = strings.Split(backendID, "@")[1] + } + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + opcache.Set(backendName, uid) + opcache.Set(backendID, uid) + + op := services.GalleryOp[gallery.GalleryBackend]{ + ID: uid, + Delete: true, + GalleryElementName: backendName, + Galleries: appConfig.BackendGalleries, + } + go func() { + galleryService.BackendGalleryChannel <- op + }() + + return c.SendString(elements.StartBackendProgressBar(uid, "0", "Backend Deletion")) + }) + + // Display the job current progress status + app.Get("/browse/backend/job/progress/:uid", func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! + + status := galleryService.GetStatus(jobUID) + if status == nil { + return c.SendString(elements.ProgressBar("0")) + } + + if status.Progress == 100 { + c.Set("HX-Trigger", "done") // this triggers /browse/backend/job/:uid + return c.SendString(elements.ProgressBar("100")) + } + if status.Error != nil { + opcache.DeleteUUID(jobUID) + return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryElementName)) + } + + return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) + }) + + // Job completion route + app.Get("/browse/backend/job/:uid", func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! + + status := galleryService.GetStatus(jobUID) + + backendID := status.GalleryElementName + opcache.DeleteUUID(jobUID) + if backendID == "" { + log.Debug().Msgf("no processing backend found for job: %+v\n", jobUID) + } + + log.Debug().Msgf("JOB finished: %+v\n", status) + showDelete := true + displayText := "Backend Installation completed" + if status.Deletion { + showDelete = false + displayText = "Backend Deletion completed" + } + + return c.SendString(elements.DoneBackendProgress(backendID, displayText, showDelete)) + }) +} diff --git a/core/http/routes/ui_gallery.go b/core/http/routes/ui_gallery.go new file mode 100644 index 00000000..1cc629ca --- /dev/null +++ b/core/http/routes/ui_gallery.go @@ -0,0 +1,282 @@ +package routes + +import ( + "fmt" + "html/template" + "math" + "sort" + "strconv" + "strings" + + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "github.com/microcosm-cc/bluemonday" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/elements" + "github.com/mudler/LocalAI/core/http/utils" + "github.com/mudler/LocalAI/core/p2p" + "github.com/mudler/LocalAI/core/services" + "github.com/mudler/LocalAI/internal" + "github.com/rs/zerolog/log" +) + +func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) { + + // Show the Models page (all models) + app.Get("/browse", func(c *fiber.Ctx) error { + term := c.Query("term") + page := c.Query("page") + items := c.Query("items") + + models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + if err != nil { + log.Error().Err(err).Msg("could not list models from galleries") + return c.Status(fiber.StatusInternalServerError).Render("views/error", fiber.Map{ + "Title": "LocalAI - Models", + "BaseURL": utils.BaseURL(c), + "Version": internal.PrintableVersion(), + "ErrorCode": "500", + "ErrorMessage": err.Error(), + }) + } + + // Get all available tags + allTags := map[string]struct{}{} + tags := []string{} + for _, m := range models { + for _, t := range m.Tags { + allTags[t] = struct{}{} + } + } + for t := range allTags { + tags = append(tags, t) + } + sort.Strings(tags) + + if term != "" { + models = gallery.GalleryElements[*gallery.GalleryModel](models).Search(term) + } + + // Get model statuses + processingModelsData, taskTypes := opcache.GetStatus() + + summary := fiber.Map{ + "Title": "LocalAI - Models", + "BaseURL": utils.BaseURL(c), + "Version": internal.PrintableVersion(), + "Models": template.HTML(elements.ListModels(models, opcache, galleryService)), + "Repositories": appConfig.Galleries, + "AllTags": tags, + "ProcessingModels": processingModelsData, + "AvailableModels": len(models), + "IsP2PEnabled": p2p.IsP2PEnabled(), + + "TaskTypes": taskTypes, + // "ApplicationConfig": appConfig, + } + + if page == "" { + page = "1" + } + + if page != "" { + // return a subset of the models + pageNum, err := strconv.Atoi(page) + if err != nil { + return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") + } + + if pageNum == 0 { + return c.Render("views/models", summary) + } + + itemsNum, err := strconv.Atoi(items) + if err != nil { + itemsNum = 21 + } + + totalPages := int(math.Ceil(float64(len(models)) / float64(itemsNum))) + + models = models.Paginate(pageNum, itemsNum) + + prevPage := pageNum - 1 + nextPage := pageNum + 1 + if prevPage < 1 { + prevPage = 1 + } + if nextPage > totalPages { + nextPage = totalPages + } + if prevPage != pageNum { + summary["PrevPage"] = prevPage + } + summary["NextPage"] = nextPage + summary["TotalPages"] = totalPages + summary["CurrentPage"] = pageNum + summary["Models"] = template.HTML(elements.ListModels(models, opcache, galleryService)) + } + + // Render index + return c.Render("views/models", summary) + }) + + // Show the models, filtered from the user input + // https://htmx.org/examples/active-search/ + app.Post("/browse/search/models", func(c *fiber.Ctx) error { + page := c.Query("page") + items := c.Query("items") + + form := struct { + Search string `form:"search"` + }{} + if err := c.BodyParser(&form); err != nil { + return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) + } + + models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + + if page != "" { + // return a subset of the models + pageNum, err := strconv.Atoi(page) + if err != nil { + return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") + } + + itemsNum, err := strconv.Atoi(items) + if err != nil { + itemsNum = 21 + } + + models = models.Paginate(pageNum, itemsNum) + } + + if form.Search != "" { + models = models.Search(form.Search) + } + + return c.SendString(elements.ListModels(models, opcache, galleryService)) + }) + + /* + + Install routes + + */ + + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/install/model/:id", func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID) + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + opcache.Set(galleryID, uid) + + op := services.GalleryOp[gallery.GalleryModel]{ + ID: uid, + GalleryElementName: galleryID, + Galleries: appConfig.Galleries, + } + go func() { + galleryService.ModelGalleryChannel <- op + }() + + return c.SendString(elements.StartModelProgressBar(uid, "0", "Installation")) + }) + + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/delete/model/:id", func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID) + var galleryName = galleryID + if strings.Contains(galleryID, "@") { + // if the galleryID contains a @ it means that it's a model from a gallery + // but we want to delete it from the local models which does not need + // a repository ID + galleryName = strings.Split(galleryID, "@")[1] + } + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + // Track the deletion job by galleryID and galleryName + // The GalleryID contains information about the repository, + // while the GalleryName is ONLY the name of the model + opcache.Set(galleryName, uid) + opcache.Set(galleryID, uid) + + op := services.GalleryOp[gallery.GalleryModel]{ + ID: uid, + Delete: true, + GalleryElementName: galleryName, + Galleries: appConfig.Galleries, + } + go func() { + galleryService.ModelGalleryChannel <- op + cl.RemoveBackendConfig(galleryName) + }() + + return c.SendString(elements.StartModelProgressBar(uid, "0", "Deletion")) + }) + + // Display the job current progress status + // If the job is done, we trigger the /browse/job/:uid route + // https://htmx.org/examples/progress-bar/ + app.Get("/browse/job/progress/:uid", func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! + + status := galleryService.GetStatus(jobUID) + if status == nil { + //fmt.Errorf("could not find any status for ID") + return c.SendString(elements.ProgressBar("0")) + } + + if status.Progress == 100 { + c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done) + return c.SendString(elements.ProgressBar("100")) + } + if status.Error != nil { + // TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user + opcache.DeleteUUID(jobUID) + return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryElementName)) + } + + return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) + }) + + // this route is hit when the job is done, and we display the + // final state (for now just displays "Installation completed") + app.Get("/browse/job/:uid", func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! + + status := galleryService.GetStatus(jobUID) + + galleryID := status.GalleryElementName + opcache.DeleteUUID(jobUID) + if galleryID == "" { + log.Debug().Msgf("no processing model found for job : %+v\n", jobUID) + } + + log.Debug().Msgf("JOB finished : %+v\n", status) + showDelete := true + displayText := "Installation completed" + if status.Deletion { + showDelete = false + displayText = "Deletion completed" + } + + return c.SendString(elements.DoneModelProgress(galleryID, displayText, showDelete)) + }) +} diff --git a/core/http/views/backends.html b/core/http/views/backends.html new file mode 100644 index 00000000..c08d0e90 --- /dev/null +++ b/core/http/views/backends.html @@ -0,0 +1,148 @@ + + +{{template "views/partials/head" .}} + + +
+ + {{template "views/partials/navbar" .}} + {{ $numBackendsPerPage := 21 }} +
+ + +
+
+

+ + Backend Management + +

+

+ {{.AvailableBackends}} backends available + + + +

+
+
+ + {{template "views/partials/inprogress" .}} + + +
+ +
+
+ +
+ + + + + + + +
+ + +
+

Filter by type:

+
+ + + + +
+
+
+ + +
+ {{.Backends}} +
+ + + {{ if gt .AvailableBackends $numBackendsPerPage }} +
+
+ {{ if .PrevPage }} + + {{ end }} +
+ Page {{.CurrentPage}} of {{.TotalPages}} +
+ {{ if .NextPage }} + + {{ end }} +
+
+ {{ end }} + +
+ {{template "views/partials/footer" .}} +
+ + + + + \ No newline at end of file diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index efec457b..229d7bf2 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -25,6 +25,9 @@ Models + + Backends + Chat @@ -57,6 +60,9 @@ Models + + Backends + Chat diff --git a/core/schema/backend.go b/core/schema/backend.go new file mode 100644 index 00000000..81bc79a6 --- /dev/null +++ b/core/schema/backend.go @@ -0,0 +1,7 @@ +package schema + +// BackendResponse represents the response for backend operations +type BackendResponse struct { + ID string `json:"id"` + StatusURL string `json:"status_url"` +} diff --git a/core/services/backends.go b/core/services/backends.go new file mode 100644 index 00000000..b83ed8dd --- /dev/null +++ b/core/services/backends.go @@ -0,0 +1,44 @@ +package services + +import ( + "github.com/mudler/LocalAI/core/gallery" + + "github.com/mudler/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" +) + +func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend]) error { + utils.ResetDownloadTimers() + g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0}) + + // displayDownload displays the download progress + progressCallback := func(fileName string, current string, total string, percentage float64) { + g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current}) + utils.DisplayDownloadFunction(fileName, current, total, percentage) + } + + var err error + if op.Delete { + err = gallery.DeleteBackendFromSystem(g.appConfig.BackendsPath, op.GalleryElementName) + g.modelLoader.DeleteExternalBackend(op.GalleryElementName) + } else { + log.Warn().Msgf("installing backend %s", op.GalleryElementName) + err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback) + if err == nil { + err = gallery.RegisterBackends(g.appConfig.BackendsPath, g.modelLoader) + } + } + if err != nil { + log.Error().Err(err).Msgf("error installing backend %s", op.GalleryElementName) + return err + } + + g.UpdateStatus(op.ID, + &GalleryOpStatus{ + Deletion: op.Delete, + Processed: true, + GalleryElementName: op.GalleryElementName, + Message: "completed", + Progress: 100}) + return nil +} diff --git a/core/services/gallery.go b/core/services/gallery.go index f499d381..0c33d243 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -2,60 +2,48 @@ package services import ( "context" - "encoding/json" "fmt" - "os" - "path/filepath" "sync" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" - "github.com/mudler/LocalAI/pkg/startup" - "github.com/mudler/LocalAI/pkg/utils" - "gopkg.in/yaml.v2" + "github.com/mudler/LocalAI/pkg/model" ) type GalleryService struct { appConfig *config.ApplicationConfig sync.Mutex - C chan gallery.GalleryOp - statuses map[string]*gallery.GalleryOpStatus + ModelGalleryChannel chan GalleryOp[gallery.GalleryModel] + BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend] + + modelLoader *model.ModelLoader + statuses map[string]*GalleryOpStatus } -func NewGalleryService(appConfig *config.ApplicationConfig) *GalleryService { +func NewGalleryService(appConfig *config.ApplicationConfig, ml *model.ModelLoader) *GalleryService { return &GalleryService{ - appConfig: appConfig, - C: make(chan gallery.GalleryOp), - statuses: make(map[string]*gallery.GalleryOpStatus), + appConfig: appConfig, + ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel]), + BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend]), + modelLoader: ml, + statuses: make(map[string]*GalleryOpStatus), } } -func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error { - - config, err := gallery.GetGalleryConfigFromURL(req.URL, modelPath) - if err != nil { - return err - } - - config.Files = append(config.Files, req.AdditionalFiles...) - - return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus, enforceScan) -} - -func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) { +func (g *GalleryService) UpdateStatus(s string, op *GalleryOpStatus) { g.Lock() defer g.Unlock() g.statuses[s] = op } -func (g *GalleryService) GetStatus(s string) *gallery.GalleryOpStatus { +func (g *GalleryService) GetStatus(s string) *GalleryOpStatus { g.Lock() defer g.Unlock() return g.statuses[s] } -func (g *GalleryService) GetAllStatus() map[string]*gallery.GalleryOpStatus { +func (g *GalleryService) GetAllStatus() map[string]*GalleryOpStatus { g.Lock() defer g.Unlock() @@ -63,153 +51,35 @@ func (g *GalleryService) GetAllStatus() map[string]*gallery.GalleryOpStatus { } func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader) { + // updates the status with an error + var updateError func(id string, e error) + if !g.appConfig.OpaqueErrors { + updateError = func(id string, e error) { + g.UpdateStatus(id, &GalleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()}) + } + } else { + updateError = func(id string, _ error) { + g.UpdateStatus(id, &GalleryOpStatus{Error: fmt.Errorf("an error occurred"), Processed: true}) + } + } + go func() { for { select { case <-c.Done(): return - case op := <-g.C: - utils.ResetDownloadTimers() - - g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Message: "processing", Progress: 0}) - - // updates the status with an error - var updateError func(e error) - if !g.appConfig.OpaqueErrors { - updateError = func(e error) { - g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()}) - } - } else { - updateError = func(_ error) { - g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Error: fmt.Errorf("an error occurred"), Processed: true}) - } - } - - // displayDownload displays the download progress - progressCallback := func(fileName string, current string, total string, percentage float64) { - g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current}) - utils.DisplayDownloadFunction(fileName, current, total, percentage) - } - - var err error - - // delete a model - if op.Delete { - modelConfig := &config.BackendConfig{} - - // Galleryname is the name of the model in this case - dat, err := os.ReadFile(filepath.Join(g.appConfig.ModelPath, op.GalleryModelName+".yaml")) - if err != nil { - updateError(err) - continue - } - err = yaml.Unmarshal(dat, modelConfig) - if err != nil { - updateError(err) - continue - } - - files := []string{} - // Remove the model from the config - if modelConfig.Model != "" { - files = append(files, modelConfig.ModelFileName()) - } - - if modelConfig.MMProj != "" { - files = append(files, modelConfig.MMProjFileName()) - } - - err = gallery.DeleteModelFromSystem(g.appConfig.ModelPath, op.GalleryModelName, files) - if err != nil { - updateError(err) - continue - } - } else { - // if the request contains a gallery name, we apply the gallery from the gallery list - if op.GalleryModelName != "" { - err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans) - } else if op.ConfigURL != "" { - err = startup.InstallModels(op.Galleries, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL) - if err != nil { - updateError(err) - continue - } - err = cl.Preload(g.appConfig.ModelPath) - } else { - err = prepareModel(g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans) - } - } - + case op := <-g.BackendGalleryChannel: + err := g.backendHandler(&op) if err != nil { - updateError(err) - continue + updateError(op.ID, err) } - // Reload models - err = cl.LoadBackendConfigsFromPath(g.appConfig.ModelPath) + case op := <-g.ModelGalleryChannel: + err := g.modelHandler(&op, cl) if err != nil { - updateError(err) - continue + updateError(op.ID, err) } - - err = cl.Preload(g.appConfig.ModelPath) - if err != nil { - updateError(err) - continue - } - - g.UpdateStatus(op.Id, - &gallery.GalleryOpStatus{ - Deletion: op.Delete, - Processed: true, - GalleryModelName: op.GalleryModelName, - Message: "completed", - Progress: 100}) } } }() } - -type galleryModel struct { - gallery.GalleryModel `yaml:",inline"` // https://github.com/go-yaml/yaml/issues/63 - ID string `json:"id"` -} - -func processRequests(modelPath string, enforceScan bool, galleries []config.Gallery, requests []galleryModel) error { - var err error - for _, r := range requests { - utils.ResetDownloadTimers() - if r.ID == "" { - err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan) - - } else { - err = gallery.InstallModelFromGallery( - galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan) - } - } - return err -} - -func ApplyGalleryFromFile(modelPath, s string, enforceScan bool, galleries []config.Gallery) error { - dat, err := os.ReadFile(s) - if err != nil { - return err - } - var requests []galleryModel - - if err := yaml.Unmarshal(dat, &requests); err != nil { - return err - } - - return processRequests(modelPath, enforceScan, galleries, requests) -} - -func ApplyGalleryFromString(modelPath, s string, enforceScan bool, galleries []config.Gallery) error { - var requests []galleryModel - err := json.Unmarshal([]byte(s), &requests) - if err != nil { - return err - } - - return processRequests(modelPath, enforceScan, galleries, requests) -} diff --git a/core/services/models.go b/core/services/models.go new file mode 100644 index 00000000..b0b6ede3 --- /dev/null +++ b/core/services/models.go @@ -0,0 +1,153 @@ +package services + +import ( + "encoding/json" + "os" + "path/filepath" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/pkg/utils" + "gopkg.in/yaml.v2" +) + +func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *config.BackendConfigLoader) error { + utils.ResetDownloadTimers() + + g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0}) + + // displayDownload displays the download progress + progressCallback := func(fileName string, current string, total string, percentage float64) { + g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current}) + utils.DisplayDownloadFunction(fileName, current, total, percentage) + } + + err := processModelOperation(op, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback) + if err != nil { + return err + } + + // Reload models + err = cl.LoadBackendConfigsFromPath(g.appConfig.ModelPath) + if err != nil { + return err + } + + err = cl.Preload(g.appConfig.ModelPath) + if err != nil { + return err + } + + g.UpdateStatus(op.ID, + &GalleryOpStatus{ + Deletion: op.Delete, + Processed: true, + GalleryElementName: op.GalleryElementName, + Message: "completed", + Progress: 100}) + + return nil +} + +func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error { + config, err := gallery.GetGalleryConfigFromURL[gallery.ModelConfig](req.URL, modelPath) + if err != nil { + return err + } + + config.Files = append(config.Files, req.AdditionalFiles...) + + return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus, enforceScan) +} + +type galleryModel struct { + gallery.GalleryModel `yaml:",inline"` // https://github.com/go-yaml/yaml/issues/63 + ID string `json:"id"` +} + +func processRequests(modelPath string, enforceScan bool, galleries []config.Gallery, requests []galleryModel) error { + var err error + for _, r := range requests { + utils.ResetDownloadTimers() + if r.ID == "" { + err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan) + + } else { + err = gallery.InstallModelFromGallery( + galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan) + } + } + return err +} + +func ApplyGalleryFromFile(modelPath, s string, enforceScan bool, galleries []config.Gallery) error { + dat, err := os.ReadFile(s) + if err != nil { + return err + } + var requests []galleryModel + + if err := yaml.Unmarshal(dat, &requests); err != nil { + return err + } + + return processRequests(modelPath, enforceScan, galleries, requests) +} + +func ApplyGalleryFromString(modelPath, s string, enforceScan bool, galleries []config.Gallery) error { + var requests []galleryModel + err := json.Unmarshal([]byte(s), &requests) + if err != nil { + return err + } + + return processRequests(modelPath, enforceScan, galleries, requests) +} + +// processModelOperation handles the installation or deletion of a model +func processModelOperation( + op *GalleryOp[gallery.GalleryModel], + modelPath string, + enforcePredownloadScans bool, + progressCallback func(string, string, string, float64), +) error { + // delete a model + if op.Delete { + modelConfig := &config.BackendConfig{} + + // Galleryname is the name of the model in this case + dat, err := os.ReadFile(filepath.Join(modelPath, op.GalleryElementName+".yaml")) + if err != nil { + return err + } + err = yaml.Unmarshal(dat, modelConfig) + if err != nil { + return err + } + + files := []string{} + // Remove the model from the config + if modelConfig.Model != "" { + files = append(files, modelConfig.ModelFileName()) + } + + if modelConfig.MMProj != "" { + files = append(files, modelConfig.MMProjFileName()) + } + + return gallery.DeleteModelFromSystem(modelPath, op.GalleryElementName, files) + } + + // if the request contains a gallery name, we apply the gallery from the gallery list + if op.GalleryElementName != "" { + return gallery.InstallModelFromGallery(op.Galleries, op.GalleryElementName, modelPath, op.Req, progressCallback, enforcePredownloadScans) + // } else if op.ConfigURL != "" { + // err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL) + // if err != nil { + // return err + // } + // return cl.Preload(modelPath) + } else { + return prepareModel(modelPath, op.Req, progressCallback, enforcePredownloadScans) + } +} diff --git a/core/services/operation.go b/core/services/operation.go new file mode 100644 index 00000000..e8d29f5d --- /dev/null +++ b/core/services/operation.go @@ -0,0 +1,81 @@ +package services + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/xsync" +) + +type GalleryOp[T any] struct { + ID string + GalleryElementName string + Delete bool + + Req T + Galleries []config.Gallery +} + +type GalleryOpStatus struct { + Deletion bool `json:"deletion"` // Deletion is true if the operation is a deletion + FileName string `json:"file_name"` + Error error `json:"error"` + Processed bool `json:"processed"` + Message string `json:"message"` + Progress float64 `json:"progress"` + TotalFileSize string `json:"file_size"` + DownloadedFileSize string `json:"downloaded_size"` + GalleryElementName string `json:"gallery_element_name"` +} + +type OpCache struct { + status *xsync.SyncedMap[string, string] + galleryService *GalleryService +} + +func NewOpCache(galleryService *GalleryService) *OpCache { + return &OpCache{ + status: xsync.NewSyncedMap[string, string](), + galleryService: galleryService, + } +} + +func (m *OpCache) Set(key string, value string) { + m.status.Set(key, value) +} + +func (m *OpCache) Get(key string) string { + return m.status.Get(key) +} + +func (m *OpCache) DeleteUUID(uuid string) { + for _, k := range m.status.Keys() { + if m.status.Get(k) == uuid { + m.status.Delete(k) + } + } +} + +func (m *OpCache) Map() map[string]string { + return m.status.Map() +} + +func (m *OpCache) Exists(key string) bool { + return m.status.Exists(key) +} + +func (m *OpCache) GetStatus() (map[string]string, map[string]string) { + processingModelsData := m.Map() + + taskTypes := map[string]string{} + + for k, v := range processingModelsData { + status := m.galleryService.GetStatus(v) + taskTypes[k] = "Installation" + if status != nil && status.Deletion { + taskTypes[k] = "Deletion" + } else if status == nil { + taskTypes[k] = "Waiting" + } + } + + return processingModelsData, taskTypes +} diff --git a/docs/content/backends.md b/docs/content/backends.md new file mode 100644 index 00000000..9f522cdb --- /dev/null +++ b/docs/content/backends.md @@ -0,0 +1,118 @@ +--- +title: "Backends" +description: "Learn how to use, manage, and develop backends in LocalAI" +weight: 4 +--- + +# Backends + +LocalAI supports a variety of backends that can be used to run different types of AI models. There are core Backends which are included, and there are containerized applications that provide the runtime environment for specific model types, such as LLMs, diffusion models, or text-to-speech models. + +## Managing Backends in the UI + +The LocalAI web interface provides an intuitive way to manage your backends: + +1. Navigate to the "Backends" section in the navigation menu +2. Browse available backends from configured galleries +3. Use the search bar to find specific backends by name, description, or type +4. Filter backends by type using the quick filter buttons (LLM, Diffusion, TTS, Whisper) +5. Install or delete backends with a single click +6. Monitor installation progress in real-time + +Each backend card displays: +- Backend name and description +- Type of models it supports +- Installation status +- Action buttons (Install/Delete) +- Additional information via the info button + +## Backend Galleries + +Backend galleries are repositories that contain backend definitions. They work similarly to model galleries but are specifically for backends. + +### Adding a Backend Gallery + +You can add backend galleries by specifying the **Environment Variable** `LOCALAI_BACKEND_GALLERIES`: + +```bash +export LOCALAI_BACKEND_GALLERIES='[{"name":"my-gallery","url":"https://raw.githubusercontent.com/username/repo/main/backends"}]' +``` +The URL needs to point to a valid yaml file, for example: + +```yaml +- name: "test-backend" + uri: "quay.io/image/tests:localai-backend-test" + alias: "foo-backend" +``` + +Where URI is the path to an OCI container image. + +### Backend Gallery Structure + +A backend gallery is a collection of YAML files, each defining a backend. Here's an example structure: + +```yaml +# backends/llm-backend.yaml +name: "llm-backend" +description: "A backend for running LLM models" +uri: "quay.io/username/llm-backend:latest" +alias: "llm" +tags: + - "llm" + - "text-generation" +``` + +## Pre-installing Backends + +You can pre-install backends when starting LocalAI using the `LOCALAI_EXTERNAL_BACKENDS` environment variable: + +```bash +export LOCALAI_EXTERNAL_BACKENDS="llm-backend,diffusion-backend" +local-ai run +``` + +## Creating a Backend + +To create a new backend, you need to: + +1. Create a container image that implements the LocalAI backend interface +2. Define a backend YAML file +3. Publish your backend to a container registry + +### Backend Container Requirements + +Your backend container should: + +1. Implement the LocalAI backend interface (gRPC or HTTP) +2. Handle model loading and inference +3. Support the required model types +4. Include necessary dependencies +5. Have a top level `run.sh` file that will be used to run the backend +6. Pushed to a registry so can be used in a gallery + + +### Publishing Your Backend + +1. Build your container image: + ```bash + docker build -t quay.io/username/my-backend:latest . + ``` + +2. Push to a container registry: + ```bash + docker push quay.io/username/my-backend:latest + ``` + +3. Add your backend to a gallery: + - Create a YAML entry in your gallery repository + - Include the backend definition + - Make the gallery accessible via HTTP/HTTPS + +## Backend Types + +LocalAI supports various types of backends: + +- **LLM Backends**: For running language models +- **Diffusion Backends**: For image generation +- **TTS Backends**: For text-to-speech conversion +- **Whisper Backends**: For speech-to-text conversion \ No newline at end of file diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md index f0bd2c0c..97df0fe1 100644 --- a/docs/content/docs/features/text-to-audio.md +++ b/docs/content/docs/features/text-to-audio.md @@ -169,11 +169,6 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ }' | aplay ``` -### Parler-tts - -`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts - - ## Using config files You can also use a `config-file` to specify TTS models and their parameters. diff --git a/gallery/index.yaml b/gallery/index.yaml index d7f70f9c..fd1ae49e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1892,6 +1892,29 @@ - filename: meta-llama_Llama-4-Scout-17B-16E-Instruct-Q3_K_S.gguf sha256: 48dfc18d40691b4190b7fecf1f89b78cadc758c3a27a9e2a1cabd686fdb822e3 uri: huggingface://bartowski/meta-llama_Llama-4-Scout-17B-16E-Instruct-GGUF/meta-llama_Llama-4-Scout-17B-16E-Instruct-Q3_K_S.gguf +- name: "jina-reranker-v1-tiny-en" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + tags: + - reranker + - gguf + - cpu + - gpu + - text-generation + - jina + urls: + - https://huggingface.co/mradermacher/jina-reranker-v1-tiny-en-GGUF + - https://huggingface.co/JinaAI/jina-reranker-v1-tiny-en-GGUF + description: | + This model is designed for blazing-fast reranking while maintaining competitive performance. What's more, it leverages the power of our JinaBERT model as its foundation. JinaBERT itself is a unique variant of the BERT architecture that supports the symmetric bidirectional variant of ALiBi. This allows jina-reranker-v1-tiny-en to process significantly longer sequences of text compared to other reranking models, up to an impressive 8,192 tokens. + overrides: + f16: true + reranking: true + parameters: + model: jina-reranker-v1-tiny-en.f16.gguf + files: + - filename: jina-reranker-v1-tiny-en.f16.gguf + sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 + uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf - &eurollm name: "eurollm-9b-instruct" icon: https://openeurollm.eu/_next/static/media/logo-dark.e7001867.svg diff --git a/go.mod b/go.mod index 57fc0947..ef137346 100644 --- a/go.mod +++ b/go.mod @@ -9,8 +9,10 @@ require ( github.com/GeertJohan/go.rice v1.0.3 github.com/Masterminds/sprig/v3 v3.3.0 github.com/alecthomas/kong v0.9.0 + github.com/census-instrumentation/opencensus-proto v0.4.1 github.com/charmbracelet/glamour v0.7.0 github.com/chasefleming/elem-go v0.26.0 + github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 github.com/containerd/containerd v1.7.19 github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 github.com/elliotchance/orderedmap/v2 v2.2.0 @@ -23,9 +25,11 @@ require ( github.com/gofiber/template/html/v2 v2.1.2 github.com/gofiber/websocket/v2 v2.2.1 github.com/gofrs/flock v0.12.1 + github.com/golang/protobuf v1.5.4 github.com/google/go-containerregistry v0.19.2 github.com/google/uuid v1.6.0 github.com/gpustack/gguf-parser-go v0.17.0 + github.com/grpc-ecosystem/grpc-gateway v1.5.0 github.com/hpcloud/tail v1.0.0 github.com/ipfs/go-log v1.0.5 github.com/jaypipes/ghw v0.12.0 @@ -39,6 +43,7 @@ require ( github.com/nikolalohinski/gonja/v2 v2.3.2 github.com/onsi/ginkgo/v2 v2.22.2 github.com/onsi/gomega v1.36.2 + github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e github.com/otiai10/openaigo v1.7.0 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.20.5 @@ -57,6 +62,7 @@ require ( go.opentelemetry.io/otel/exporters/prometheus v0.50.0 go.opentelemetry.io/otel/metric v1.34.0 go.opentelemetry.io/otel/sdk/metric v1.28.0 + google.golang.org/api v0.180.0 google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.36.5 gopkg.in/yaml.v2 v2.4.0 @@ -65,14 +71,22 @@ require ( ) require ( + cel.dev/expr v0.16.0 // indirect + cloud.google.com/go/auth v0.4.1 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect + cloud.google.com/go/compute/metadata v0.5.0 // indirect github.com/containerd/platforms v0.2.1 // indirect github.com/cpuguy83/dockercfg v0.3.2 // indirect github.com/daaku/go.zipexe v1.0.2 // indirect github.com/distribution/reference v0.6.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect + github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect github.com/fasthttp/websocket v1.5.8 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/google/s2a-go v0.1.7 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect + github.com/googleapis/gax-go/v2 v2.12.4 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect @@ -82,7 +96,6 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/morikuni/aec v1.0.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e // indirect github.com/pion/datachannel v1.5.10 // indirect github.com/pion/dtls/v2 v2.2.12 // indirect github.com/pion/dtls/v3 v3.0.4 // indirect @@ -112,7 +125,9 @@ require ( go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect go.uber.org/mock v0.5.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect golang.org/x/time v0.8.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect ) require ( diff --git a/go.sum b/go.sum index 5ebe9ac1..02586a80 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,15 @@ +cel.dev/expr v0.16.0 h1:yloc84fytn4zmJX2GU3TkXGsaieaV7dQ057Qs4sIG2Y= +cel.dev/expr v0.16.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo= +cloud.google.com/go/auth v0.4.1 h1:Z7YNIhlWRtrnKlZke7z3GMqzvuYzdc2z98F9D1NV5Hg= +cloud.google.com/go/auth v0.4.1/go.mod h1:QVBuVEKpCn4Zp58hzRGvL0tjRGU0YqdRTdCHM1IHnro= +cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= +cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= +cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= +cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU= @@ -65,6 +73,8 @@ github.com/c-robinson/iplib v1.0.8/go.mod h1:i3LuuFL1hRT5gFpBRnEydzw8R6yhGkF4szN github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= +github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= @@ -74,6 +84,8 @@ github.com/chasefleming/elem-go v0.26.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= @@ -149,6 +161,8 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= +github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= github.com/fasthttp/websocket v1.5.8 h1:k5DpirKkftIF/w1R8ZzjSgARJrs54Je9YJK37DL/Ah8= github.com/fasthttp/websocket v1.5.8/go.mod h1:d08g8WaT6nnyvg9uMm8K9zMYyDjfKyj3170AtPRuVU0= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -163,8 +177,6 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad h1:dQ93Vd6i25o+zH9vvnZ8mu7jtJQ6jT3D+zE3V8Q49n0= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= @@ -238,6 +250,8 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -266,12 +280,18 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI github.com/google/pprof v0.0.0-20250208200701-d0013a598941 h1:43XjGa6toxLpeksjcxs1jIoIyr+vUfOqY2c6HB4bpoc= github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= +github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= +github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg= +github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg= +github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -485,8 +505,6 @@ github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/mudler/edgevpn v0.30.1 h1:4yyhNFJX62NpRp50sxiyZE5E/sdAqEZX+aE5Mv7QS60= github.com/mudler/edgevpn v0.30.1/go.mod h1:IAJkkJ0oH3rwsSGOGTFT4UBYFqYuD/QyaKzTLB3P/eU= -github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA= -github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU= github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU= github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 h1:Qh6ghkMgTu6siFbTf7L3IszJmshMhXxNL4V+t7IIA6w= @@ -907,6 +925,8 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1026,6 +1046,8 @@ gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y= +google.golang.org/api v0.180.0 h1:M2D87Yo0rGBPWpo1orwfCLehUUL6E7/TYe5gvMQWDh4= +google.golang.org/api v0.180.0/go.mod h1:51AiyoEg1MJPSZ9zvklA8VnRILPXxn1iVen9v25XHAE= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1038,6 +1060,7 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw= +google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw= google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 h1:T6rh4haD3GVYsgEfWExoCZA2o2FmbNyKpTuAxbEFPTg= google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9/go.mod h1:wp2WsuBYj6j8wUdo3ToZsdxxixbvQNAHqVJrTgi5E5M= google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 h1:QCqS/PdaHTSWGvupk2F/ehwHtGc0/GYkT+3GAcR1CCc= diff --git a/main.go b/main.go index 8dda313d..380e8f5b 100644 --- a/main.go +++ b/main.go @@ -65,7 +65,7 @@ Some of the models compatible are: - Alpaca - StableLM (ggml quantized) -For a list of compatible models, check out: https://localai.io/model-compatibility/index.html +For a list of all available models for one-click install, check out: https://models.localai.io Copyright: Ettore Di Giacinto @@ -76,6 +76,7 @@ Version: ${version} kong.Vars{ "basepath": kong.ExpandPath("."), "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`, + "backends": `[{"name":"localai", "url":"github:mudler/LocalAI/backend/index.yaml@master"}]`, "version": internal.PrintableVersion(), }, ) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index cf24668c..6bc78aaa 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -305,7 +305,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu } // Check if the backend is provided as external - if uri, ok := o.externalBackends[backend]; ok { + if uri, ok := ml.GetAllExternalBackends(o)[backend]; ok { log.Debug().Msgf("Loading external backend: %s", uri) // check if uri is a file or a address if fi, err := os.Stat(uri); err == nil { @@ -526,7 +526,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) { } // append externalBackends supplied by the user via the CLI - for _, b := range o.externalBackends { + for _, b := range ml.GetAllExternalBackends(o) { autoLoadBackends = append(autoLoadBackends, b) } diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 5ecd7e90..e1543a2c 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -3,6 +3,7 @@ package model import ( "context" "fmt" + "maps" "os" "path/filepath" "strings" @@ -18,19 +19,21 @@ import ( // TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we separate directories for .bin/.yaml and .tmpl type ModelLoader struct { - ModelPath string - mu sync.Mutex - singletonLock sync.Mutex - singletonMode bool - models map[string]*Model - wd *WatchDog + ModelPath string + mu sync.Mutex + singletonLock sync.Mutex + singletonMode bool + models map[string]*Model + wd *WatchDog + externalBackends map[string]string } func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader { nml := &ModelLoader{ - ModelPath: modelPath, - models: make(map[string]*Model), - singletonMode: singleActiveBackend, + ModelPath: modelPath, + models: make(map[string]*Model), + singletonMode: singleActiveBackend, + externalBackends: make(map[string]string), } return nml @@ -44,6 +47,33 @@ func (ml *ModelLoader) ExistsInModelPath(s string) bool { return utils.ExistsInPath(ml.ModelPath, s) } +func (ml *ModelLoader) SetExternalBackend(name, uri string) { + ml.mu.Lock() + defer ml.mu.Unlock() + ml.externalBackends[name] = uri +} + +func (ml *ModelLoader) DeleteExternalBackend(name string) { + ml.mu.Lock() + defer ml.mu.Unlock() + delete(ml.externalBackends, name) +} + +func (ml *ModelLoader) GetExternalBackend(name string) string { + ml.mu.Lock() + defer ml.mu.Unlock() + return ml.externalBackends[name] +} + +func (ml *ModelLoader) GetAllExternalBackends(o *Options) map[string]string { + backends := make(map[string]string) + maps.Copy(backends, ml.externalBackends) + if o != nil { + maps.Copy(backends, o.externalBackends) + } + return backends +} + var knownFilesToSkip []string = []string{ "MODEL_CARD", "README", diff --git a/pkg/startup/backend_preload.go b/pkg/startup/backend_preload.go new file mode 100644 index 00000000..17403c0c --- /dev/null +++ b/pkg/startup/backend_preload.go @@ -0,0 +1,32 @@ +package startup + +import ( + "errors" + "fmt" + "strings" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" +) + +func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error { + var errs error + for _, backend := range backends { + switch { + case strings.HasPrefix(backend, "oci://"): + backend = strings.TrimPrefix(backend, "oci://") + + if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{ + URI: backend, + }, downloadStatus); err != nil { + errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend)) + } + default: + err := gallery.InstallBackendFromGallery(galleries, backend, backendPath, downloadStatus) + if err != nil { + errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend)) + } + } + } + return errs +} diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index 0f598df5..93da3628 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -119,7 +119,7 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl return err, false } - model := gallery.FindModel(models, modelName, modelPath) + model := gallery.FindGalleryElement(models, modelName, modelPath) if model == nil { return err, false } diff --git a/swagger/docs.go b/swagger/docs.go index 420610b3..125e93d7 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -246,7 +246,7 @@ const docTemplate = `{ "schema": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/gallery.GalleryOpStatus" + "$ref": "#/definitions/services.GalleryOpStatus" } } } @@ -260,7 +260,7 @@ const docTemplate = `{ "200": { "description": "Response", "schema": { - "$ref": "#/definitions/gallery.GalleryOpStatus" + "$ref": "#/definitions/services.GalleryOpStatus" } } } @@ -987,7 +987,7 @@ const docTemplate = `{ } } }, - "gallery.GalleryOpStatus": { + "services.GalleryOpStatus": { "type": "object", "properties": { "deletion": { diff --git a/swagger/swagger.json b/swagger/swagger.json index 6f624474..5a769a08 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -239,7 +239,7 @@ "schema": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/gallery.GalleryOpStatus" + "$ref": "#/definitions/services.GalleryOpStatus" } } } @@ -253,7 +253,7 @@ "200": { "description": "Response", "schema": { - "$ref": "#/definitions/gallery.GalleryOpStatus" + "$ref": "#/definitions/services.GalleryOpStatus" } } } @@ -980,7 +980,7 @@ } } }, - "gallery.GalleryOpStatus": { + "services.GalleryOpStatus": { "type": "object", "properties": { "deletion": { diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index f991e943..d566eac1 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -100,7 +100,7 @@ definitions: type: string type: array type: object - gallery.GalleryOpStatus: + services.GalleryOpStatus: properties: deletion: description: Deletion is true if the operation is a deletion @@ -891,7 +891,7 @@ paths: description: Response schema: additionalProperties: - $ref: '#/definitions/gallery.GalleryOpStatus' + $ref: '#/definitions/services.GalleryOpStatus' type: object summary: Returns all the jobs status progress /models/jobs/{uuid}: @@ -900,7 +900,7 @@ paths: "200": description: Response schema: - $ref: '#/definitions/gallery.GalleryOpStatus' + $ref: '#/definitions/services.GalleryOpStatus' summary: Returns the job status /system: get: diff --git a/tests/fixtures/backend-image/Dockerfile b/tests/fixtures/backend-image/Dockerfile new file mode 100644 index 00000000..56b67410 --- /dev/null +++ b/tests/fixtures/backend-image/Dockerfile @@ -0,0 +1,4 @@ +FROM scratch + +COPY src / +COPY run.sh / \ No newline at end of file diff --git a/tests/fixtures/backend-image/run.sh b/tests/fixtures/backend-image/run.sh new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/backend-image/src/.keep b/tests/fixtures/backend-image/src/.keep new file mode 100644 index 00000000..85d8d2ef --- /dev/null +++ b/tests/fixtures/backend-image/src/.keep @@ -0,0 +1,4 @@ +FROM SCRATCH + +COPY src / +COPY run.sh / \ No newline at end of file