feat: Add backend gallery (#5607)

* feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-17 00:05:00 +00:00 · 2025-06-15 14:56:52 +02:00 · 2025-06-15 14:56:52 +02:00 · 2d64269763
commit 2d64269763
parent a7a6020328
114 changed files with 3996 additions and 1382 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -61,10 +61,6 @@ updates:
    directory: "/backend/python/openvoice"
    schedule:
      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/parler-tts"
-    schedule:
-      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/rerankers"
    schedule:
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@ -9,13 +9,12 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  extras-image-build:
+  image-build:
    uses: ./.github/workflows/image_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
      ffmpeg: ${{ matrix.ffmpeg }}
-      image-type: ${{ matrix.image-type }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@ -36,16 +35,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          # This is basically covered by the AIO test
-          # - build-type: ''
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-ffmpeg'
-          #   ffmpeg: 'true'
-          #   image-type: 'extras'
-          #   runs-on: 'arc-runner-set'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
@ -53,8 +42,7 @@ jobs:
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'hipblas'
@ -62,10 +50,9 @@ jobs:
            tag-latest: 'false'
            tag-suffix: '-hipblas'
            ffmpeg: 'false'
-            image-type: 'extras'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
@ -74,77 +61,13 @@ jobs:
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: 'sycl-f16-ffmpeg'
            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-vulkan-ffmpeg-core'
            ffmpeg: 'true'
-            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
-  # core-image-build:
-  #   uses: ./.github/workflows/image_build.yml
-  #   with:
-  #     tag-latest: ${{ matrix.tag-latest }}
-  #     tag-suffix: ${{ matrix.tag-suffix }}
-  #     ffmpeg: ${{ matrix.ffmpeg }}
-  #     image-type: ${{ matrix.image-type }}
-  #     build-type: ${{ matrix.build-type }}
-  #     cuda-major-version: ${{ matrix.cuda-major-version }}
-  #     cuda-minor-version: ${{ matrix.cuda-minor-version }}
-  #     platforms: ${{ matrix.platforms }}
-  #     runs-on: ${{ matrix.runs-on }}
-  #     base-image: ${{ matrix.base-image }}
-  #     grpc-base-image: ${{ matrix.grpc-base-image }}
-  #     makeflags: ${{ matrix.makeflags }}
-  #   secrets:
-  #     dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-  #     dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-  #     quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-  #   strategy:
-  #     matrix:
-  #       include:
-          # - build-type: ''
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
-          # - build-type: 'sycl_f16'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-          #   grpc-base-image: "ubuntu:22.04"
-          #   tag-suffix: 'sycl-f16-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'arc-runner-set'
-          #   makeflags: "--jobs=3 --output-sync=target"
-          # - build-type: 'cublas'
-          #   cuda-major-version: "12"
-          #   cuda-minor-version: "0"
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
-          # - build-type: 'vulkan'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-vulkan-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -37,24 +37,9 @@ jobs:
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
-      # Pushing with all jobs in parallel
-      # eats the bandwidth of all the nodes
      max-parallel: 2
      matrix:
        include:
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-hipblas-extras'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            aio: "-aio-gpu-hipblas"
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            latest-image: 'latest-gpu-hipblas-extras'
-            latest-image-aio: 'latest-aio-gpu-hipblas'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
@ -66,112 +51,8 @@ jobs:
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
            latest-image: 'latest-gpu-hipblas'
-  self-hosted-jobs:
-    uses: ./.github/workflows/image_build.yml
-    with:
-      tag-latest: ${{ matrix.tag-latest }}
-      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
-      image-type: ${{ matrix.image-type }}
-      build-type: ${{ matrix.build-type }}
-      cuda-major-version: ${{ matrix.cuda-major-version }}
-      cuda-minor-version: ${{ matrix.cuda-minor-version }}
-      platforms: ${{ matrix.platforms }}
-      runs-on: ${{ matrix.runs-on }}
-      base-image: ${{ matrix.base-image }}
-      grpc-base-image: ${{ matrix.grpc-base-image }}
-      aio: ${{ matrix.aio }}
-      makeflags: ${{ matrix.makeflags }}
-      latest-image: ${{ matrix.latest-image }}
-      latest-image-aio: ${{ matrix.latest-image-aio }}
-    secrets:
-      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
-      # Pushing with all jobs in parallel
-      # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
-      matrix:
-        include:
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11-extras'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            aio: "-aio-gpu-nvidia-cuda-11"
-            latest-image: 'latest-gpu-nvidia-cuda-11-extras'
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-extras'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
-            aio: "-aio-gpu-nvidia-cuda-12"
-            latest-image: 'latest-gpu-nvidia-cuda-12-extras'
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f16'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16-extras'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            aio: "-aio-gpu-intel-f16"
-            latest-image: 'latest-gpu-intel-f16-extras'
-            latest-image-aio: 'latest-aio-gpu-intel-f16'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f32'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32-extras'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            runs-on: 'arc-runner-set'
-            aio: "-aio-gpu-intel-f32"
-            latest-image: 'latest-gpu-intel-f32-extras'
-            latest-image-aio: 'latest-aio-gpu-intel-f32'
-            makeflags: "--jobs=3 --output-sync=target"
-          # Core images
-          - build-type: 'sycl_f16'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16'
-            ffmpeg: 'true'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f16'
-          - build-type: 'sycl_f32'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32'
-            ffmpeg: 'true'
-            image-type: 'core'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f32'
+            aio: "-aio-gpu-hipblas"
+            latest-image-aio: 'latest-aio-gpu-hipblas'

  core-image-build:
    uses: ./.github/workflows/image_build.yml
@ -226,7 +107,9 @@ jobs:
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
-            latest-image: 'latest-gpu-nvidia-cuda-12'
+            latest-image: 'latest-gpu-nvidia-cuda-11'
+            aio: "-aio-gpu-nvidia-cuda-11"
+            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
@ -240,6 +123,8 @@ jobs:
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
            latest-image: 'latest-gpu-nvidia-cuda-12'
+            aio: "-aio-gpu-nvidia-cuda-12"
+            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
@ -251,6 +136,35 @@ jobs:
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
            latest-image: 'latest-gpu-vulkan'
+            aio: "-aio-gpu-vulkan"
+            latest-image-aio: 'latest-aio-gpu-vulkan'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            grpc-base-image: "ubuntu:22.04"
+            tag-suffix: '-sycl-f16'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+            latest-image: 'latest-gpu-intel-f16'
+            aio: "-aio-gpu-intel-f16"
+            latest-image-aio: 'latest-aio-gpu-intel-f16'
+          - build-type: 'sycl_f32'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            grpc-base-image: "ubuntu:22.04"
+            tag-suffix: '-sycl-f32'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+            latest-image: 'latest-gpu-intel-f32'
+            aio: "-aio-gpu-intel-f32"
+            latest-image-aio: 'latest-aio-gpu-intel-f32'
+
  gh-runner:
    uses: ./.github/workflows/image_build.yml
    with:
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@ -53,10 +53,6 @@ on:
        description: 'Skip drivers by default'
        default: 'false'
        type: string
-      image-type:
-        description: 'Image type'
-        default: ''
-        type: string
      runs-on:
        description: 'Runs on'
        required: true
@ -159,11 +155,11 @@ jobs:
        uses: docker/metadata-action@v5
        with:
          images: |
-            ttl.sh/localai-ci-pr-${{ github.event.number }}
+            quay.io/go-skynet/ci-tests
          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-            type=sha
+            type=ref,event=branch,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
+            type=semver,pattern={{raw}},suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
+            type=sha,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }}
@ -211,7 +207,7 @@ jobs:
          password: ${{ secrets.dockerPassword }}

      - name: Login to DockerHub
-        if: github.event_name != 'pull_request'
+        # if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
          registry: quay.io
@ -232,7 +228,6 @@ jobs:
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            FFMPEG=${{ inputs.ffmpeg }}
-            IMAGE_TYPE=${{ inputs.image-type }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@ -261,7 +256,6 @@ jobs:
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            FFMPEG=${{ inputs.ffmpeg }}
-            IMAGE_TYPE=${{ inputs.image-type }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@ -275,10 +269,6 @@ jobs:
          push: true
          tags: ${{ steps.meta_pull_request.outputs.tags }}
          labels: ${{ steps.meta_pull_request.outputs.labels }}
-      - name: Testing image
-        if: github.event_name == 'pull_request'
-        run: |
-          echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
 ## End testing image
      - name: Build and push AIO image
        if: inputs.aio != ''
--- a/.github/workflows/python_backend.yml
+++ b/.github/workflows/python_backend.yml
@ -0,0 +1,457 @@
+---
+name: 'build python backend container images'
+
+on:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+  pull_request:
+
+concurrency:
+  group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true
+
+jobs:
+  backend-jobs:
+    uses: ./.github/workflows/python_backend_build.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+      build-type: ${{ matrix.build-type }}
+      cuda-major-version: ${{ matrix.cuda-major-version }}
+      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+      platforms: ${{ matrix.platforms }}
+      runs-on: ${{ matrix.runs-on }}
+      base-image: ${{ matrix.base-image }}
+      backend: ${{ matrix.backend }}
+      latest-image: ${{ matrix.latest-image }}
+    secrets:
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      fail-fast: false
+      max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
+      matrix:
+        include:
+          # CUDA 11 builds
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "rerankers"
+            latest-image: 'latest-gpu-nvidia-cuda-11-rerankers'
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-vllm'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "vllm"
+            latest-image: 'latest-gpu-nvidia-cuda-11-vllm'
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "transformers"
+            latest-image: 'latest-gpu-nvidia-cuda-11-transformers'
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "diffusers"
+            latest-image: 'latest-gpu-nvidia-cuda-11-diffusers'
+          # CUDA 11 additional backends
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "kokoro"
+            latest-image: 'latest-gpu-nvidia-cuda-11-kokoro'
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "faster-whisper"
+            latest-image: 'latest-gpu-nvidia-cuda-11-faster-whisper'
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "coqui"
+            latest-image: 'latest-gpu-nvidia-cuda-11-coqui'
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-bark'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "bark"
+            latest-image: 'latest-gpu-nvidia-cuda-11-bark'
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "chatterbox"
+            latest-image: 'latest-gpu-nvidia-cuda-11-chatterbox'
+          # CUDA 12 builds
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "rerankers"
+            latest-image: 'latest-gpu-nvidia-cuda-12-rerankers'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-vllm'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "vllm"
+            latest-image: 'latest-gpu-nvidia-cuda-12-vllm'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-transformers'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "transformers"
+            latest-image: 'latest-gpu-nvidia-cuda-12-transformers'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "diffusers"
+            latest-image: 'latest-gpu-nvidia-cuda-12-diffusers'
+          # CUDA 12 additional backends
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "kokoro"
+            latest-image: 'latest-gpu-nvidia-cuda-12-kokoro'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "faster-whisper"
+            latest-image: 'latest-gpu-nvidia-cuda-12-faster-whisper'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-coqui'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "coqui"
+            latest-image: 'latest-gpu-nvidia-cuda-12-coqui'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-bark'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "bark"
+            latest-image: 'latest-gpu-nvidia-cuda-12-bark'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            backend: "chatterbox"
+            latest-image: 'latest-gpu-nvidia-cuda-12-chatterbox'
+          # hipblas builds
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-rerankers'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "rerankers"
+            latest-image: 'latest-gpu-rocm-hipblas-rerankers'
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-vllm'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "vllm"
+            latest-image: 'latest-gpu-rocm-hipblas-vllm'
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-transformers'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "transformers"
+            latest-image: 'latest-gpu-rocm-hipblas-transformers'
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-diffusers'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "diffusers"
+            latest-image: 'latest-gpu-rocm-hipblas-diffusers'
+          # ROCm additional backends
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-kokoro'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "kokoro"
+            latest-image: 'latest-gpu-rocm-hipblas-kokoro'
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "faster-whisper"
+            latest-image: 'latest-gpu-rocm-hipblas-faster-whisper'
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-coqui'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "coqui"
+            latest-image: 'latest-gpu-rocm-hipblas-coqui'
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-rocm-hipblas-bark'
+            runs-on: 'arc-runner-set'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            backend: "bark"
+            latest-image: 'latest-gpu-rocm-hipblas-bark'
+          # sycl builds
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-rerankers'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "rerankers"
+            latest-image: 'latest-gpu-intel-sycl-f32-rerankers'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-rerankers'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "rerankers"
+            latest-image: 'latest-gpu-intel-sycl-f16-rerankers'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-vllm'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "vllm"
+            latest-image: 'latest-gpu-intel-sycl-f32-vllm'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-vllm'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "vllm"
+            latest-image: 'latest-gpu-intel-sycl-f16-vllm'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-transformers'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "transformers"
+            latest-image: 'latest-gpu-intel-sycl-f32-transformers'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-transformers'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "transformers"
+            latest-image: 'latest-gpu-intel-sycl-f16-transformers'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-diffusers'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "diffusers"
+            latest-image: 'latest-gpu-intel-sycl-f32-diffusers'
+          # SYCL additional backends
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-kokoro'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "kokoro"
+            latest-image: 'latest-gpu-intel-sycl-f32-kokoro'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-kokoro'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "kokoro"
+            latest-image: 'latest-gpu-intel-sycl-f16-kokoro'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "faster-whisper"
+            latest-image: 'latest-gpu-intel-sycl-f32-faster-whisper'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "faster-whisper"
+            latest-image: 'latest-gpu-intel-sycl-f16-faster-whisper'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-coqui'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "coqui"
+            latest-image: 'latest-gpu-intel-sycl-f32-coqui'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-coqui'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "coqui"
+            latest-image: 'latest-gpu-intel-sycl-f16-coqui'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-bark'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "bark"
+            latest-image: 'latest-gpu-intel-sycl-f32-bark'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-bark'
+            runs-on: 'arc-runner-set'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            backend: "bark"
+            latest-image: 'latest-gpu-intel-sycl-f16-bark'
--- a/.github/workflows/python_backend_build.yml
+++ b/.github/workflows/python_backend_build.yml
@ -0,0 +1,198 @@
+---
+name: 'build python backend container images (reusable)'
+
+on:
+  workflow_call:
+    inputs:
+      base-image:
+        description: 'Base image'
+        required: true
+        type: string
+      build-type:
+        description: 'Build type'
+        default: ''
+        type: string
+      cuda-major-version:
+        description: 'CUDA major version'
+        default: "12"
+        type: string
+      cuda-minor-version:
+        description: 'CUDA minor version'
+        default: "1"
+        type: string
+      platforms:
+        description: 'Platforms'
+        default: ''
+        type: string
+      tag-latest:
+        description: 'Tag latest'
+        default: ''
+        type: string
+      latest-image:
+        description: 'Tag latest'
+        default: ''
+        type: string
+      tag-suffix:
+        description: 'Tag suffix'
+        default: ''
+        type: string
+      runs-on:
+        description: 'Runs on'
+        required: true
+        default: ''
+        type: string
+      backend:
+        description: 'Backend to build'
+        required: true
+        type: string
+    secrets:
+      dockerUsername:
+        required: true
+      dockerPassword:
+        required: true
+      quayUsername:
+        required: true
+      quayPassword:
+        required: true
+
+jobs:
+  reusable_python_backend-build:
+    runs-on: ${{ inputs.runs-on }}
+    steps:
+      - name: Force Install GIT latest
+        run: |
+          sudo apt-get update \
+          && sudo apt-get install -y software-properties-common \
+          && sudo apt-get update \
+          && sudo add-apt-repository -y ppa:git-core/ppa \
+          && sudo apt-get update \
+          && sudo apt-get install -y git
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Release space from worker
+        if: inputs.runs-on == 'ubuntu-latest'
+        run: |
+          echo "Listing top largest packages"
+          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+          head -n 30 <<< "${pkgs}"
+          echo
+          df -h
+          echo
+          sudo apt-get autoremove -y
+          sudo apt-get clean
+          echo
+          df -h
+
+      - name: Docker meta
+        id: meta
+        if: github.event_name != 'pull_request'
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            quay.io/go-skynet/local-ai-backends
+            localai/localai-backends
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{raw}}
+            type=sha
+          flavor: |
+            latest=${{ inputs.tag-latest }}
+            suffix=${{ inputs.tag-suffix }}
+
+      - name: Docker meta for PR
+        id: meta_pull_request
+        if: github.event_name == 'pull_request'
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            quay.io/go-skynet/ci-tests
+          tags: |
+            type=ref,event=branch,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
+            type=semver,pattern={{raw}},suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
+            type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
+          flavor: |
+            latest=${{ inputs.tag-latest }}
+            suffix=${{ inputs.tag-suffix }}
+## End testing image
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@master
+        with:
+          platforms: all
+
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@master
+
+      - name: Login to DockerHub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.dockerUsername }}
+          password: ${{ secrets.dockerPassword }}
+
+      - name: Login to Quay.io
+        # if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: quay.io
+          username: ${{ secrets.quayUsername }}
+          password: ${{ secrets.quayPassword }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        if: github.event_name != 'pull_request'
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            BUILD_TYPE=${{ inputs.build-type }}
+            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
+            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
+            BASE_IMAGE=${{ inputs.base-image }}
+            BACKEND=${{ inputs.backend }}
+          context: ./backend
+          file: ./backend/Dockerfile.python
+          cache-from: type=gha
+          platforms: ${{ inputs.platforms }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+      - name: Build and push (PR)
+        uses: docker/build-push-action@v6
+        if: github.event_name == 'pull_request'
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            BUILD_TYPE=${{ inputs.build-type }}
+            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
+            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
+            BASE_IMAGE=${{ inputs.base-image }}
+            BACKEND=${{ inputs.backend }}
+          context: ./backend
+          file: ./backend/Dockerfile.python
+          cache-from: type=gha
+          platforms: ${{ inputs.platforms }}
+          push: true
+          tags: ${{ steps.meta_pull_request.outputs.tags }}
+          labels: ${{ steps.meta_pull_request.outputs.labels }}
+
+      - name: Cleanup
+        run: |
+          docker builder prune -f
+          docker system prune --force --volumes --all
+
+      - name: Latest tag
+        if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
+        run: |
+          docker pull localai/localai-backends:${{ steps.meta.outputs.version }}
+          docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }}
+          docker push localai/localai-backends:${{ inputs.latest-image }}
+          docker pull quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }}
+          docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
+          docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
+
+      - name: job summary
+        run: |
+          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY 
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -188,7 +188,7 @@ jobs:
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Build images
        run: |
-          docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
+          docker build --build-arg FFMPEG=true --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
          BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
      - name: Test
        run: |