mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-17 08:15:00 +00:00

* feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
207 lines
No EOL
7.6 KiB
YAML
207 lines
No EOL
7.6 KiB
YAML
---
|
|
name: 'build container images'
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- master
|
|
tags:
|
|
- '*'
|
|
|
|
concurrency:
|
|
group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
hipblas-jobs:
|
|
uses: ./.github/workflows/image_build.yml
|
|
with:
|
|
tag-latest: ${{ matrix.tag-latest }}
|
|
tag-suffix: ${{ matrix.tag-suffix }}
|
|
ffmpeg: ${{ matrix.ffmpeg }}
|
|
image-type: ${{ matrix.image-type }}
|
|
build-type: ${{ matrix.build-type }}
|
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
platforms: ${{ matrix.platforms }}
|
|
runs-on: ${{ matrix.runs-on }}
|
|
base-image: ${{ matrix.base-image }}
|
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
aio: ${{ matrix.aio }}
|
|
makeflags: ${{ matrix.makeflags }}
|
|
latest-image: ${{ matrix.latest-image }}
|
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
secrets:
|
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
strategy:
|
|
max-parallel: 2
|
|
matrix:
|
|
include:
|
|
- build-type: 'hipblas'
|
|
platforms: 'linux/amd64'
|
|
tag-latest: 'false'
|
|
tag-suffix: '-hipblas'
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
grpc-base-image: "ubuntu:22.04"
|
|
runs-on: 'arc-runner-set'
|
|
makeflags: "--jobs=3 --output-sync=target"
|
|
latest-image: 'latest-gpu-hipblas'
|
|
aio: "-aio-gpu-hipblas"
|
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
|
|
|
core-image-build:
|
|
uses: ./.github/workflows/image_build.yml
|
|
with:
|
|
tag-latest: ${{ matrix.tag-latest }}
|
|
tag-suffix: ${{ matrix.tag-suffix }}
|
|
ffmpeg: ${{ matrix.ffmpeg }}
|
|
image-type: ${{ matrix.image-type }}
|
|
build-type: ${{ matrix.build-type }}
|
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
platforms: ${{ matrix.platforms }}
|
|
runs-on: ${{ matrix.runs-on }}
|
|
aio: ${{ matrix.aio }}
|
|
base-image: ${{ matrix.base-image }}
|
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
makeflags: ${{ matrix.makeflags }}
|
|
latest-image: ${{ matrix.latest-image }}
|
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
skip-drivers: ${{ matrix.skip-drivers }}
|
|
secrets:
|
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
strategy:
|
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
|
matrix:
|
|
include:
|
|
- build-type: ''
|
|
platforms: 'linux/amd64,linux/arm64'
|
|
tag-latest: 'auto'
|
|
tag-suffix: ''
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
base-image: "ubuntu:22.04"
|
|
runs-on: 'arc-runner-set'
|
|
aio: "-aio-cpu"
|
|
latest-image: 'latest-cpu'
|
|
latest-image-aio: 'latest-aio-cpu'
|
|
makeflags: "--jobs=4 --output-sync=target"
|
|
skip-drivers: 'false'
|
|
- build-type: 'cublas'
|
|
cuda-major-version: "11"
|
|
cuda-minor-version: "7"
|
|
platforms: 'linux/amd64'
|
|
tag-latest: 'false'
|
|
tag-suffix: '-cublas-cuda11'
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
runs-on: 'arc-runner-set'
|
|
base-image: "ubuntu:22.04"
|
|
makeflags: "--jobs=4 --output-sync=target"
|
|
skip-drivers: 'false'
|
|
latest-image: 'latest-gpu-nvidia-cuda-11'
|
|
aio: "-aio-gpu-nvidia-cuda-11"
|
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
|
- build-type: 'cublas'
|
|
cuda-major-version: "12"
|
|
cuda-minor-version: "0"
|
|
platforms: 'linux/amd64'
|
|
tag-latest: 'false'
|
|
tag-suffix: '-cublas-cuda12'
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
runs-on: 'arc-runner-set'
|
|
base-image: "ubuntu:22.04"
|
|
skip-drivers: 'false'
|
|
makeflags: "--jobs=4 --output-sync=target"
|
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
|
aio: "-aio-gpu-nvidia-cuda-12"
|
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
|
- build-type: 'vulkan'
|
|
platforms: 'linux/amd64'
|
|
tag-latest: 'false'
|
|
tag-suffix: '-vulkan'
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
runs-on: 'arc-runner-set'
|
|
base-image: "ubuntu:22.04"
|
|
skip-drivers: 'false'
|
|
makeflags: "--jobs=4 --output-sync=target"
|
|
latest-image: 'latest-gpu-vulkan'
|
|
aio: "-aio-gpu-vulkan"
|
|
latest-image-aio: 'latest-aio-gpu-vulkan'
|
|
- build-type: 'sycl_f16'
|
|
platforms: 'linux/amd64'
|
|
tag-latest: 'false'
|
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
grpc-base-image: "ubuntu:22.04"
|
|
tag-suffix: '-sycl-f16'
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
runs-on: 'arc-runner-set'
|
|
makeflags: "--jobs=3 --output-sync=target"
|
|
latest-image: 'latest-gpu-intel-f16'
|
|
aio: "-aio-gpu-intel-f16"
|
|
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
|
- build-type: 'sycl_f32'
|
|
platforms: 'linux/amd64'
|
|
tag-latest: 'false'
|
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
grpc-base-image: "ubuntu:22.04"
|
|
tag-suffix: '-sycl-f32'
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
runs-on: 'arc-runner-set'
|
|
makeflags: "--jobs=3 --output-sync=target"
|
|
latest-image: 'latest-gpu-intel-f32'
|
|
aio: "-aio-gpu-intel-f32"
|
|
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
|
|
|
gh-runner:
|
|
uses: ./.github/workflows/image_build.yml
|
|
with:
|
|
tag-latest: ${{ matrix.tag-latest }}
|
|
tag-suffix: ${{ matrix.tag-suffix }}
|
|
ffmpeg: ${{ matrix.ffmpeg }}
|
|
image-type: ${{ matrix.image-type }}
|
|
build-type: ${{ matrix.build-type }}
|
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
platforms: ${{ matrix.platforms }}
|
|
runs-on: ${{ matrix.runs-on }}
|
|
aio: ${{ matrix.aio }}
|
|
base-image: ${{ matrix.base-image }}
|
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
makeflags: ${{ matrix.makeflags }}
|
|
latest-image: ${{ matrix.latest-image }}
|
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
skip-drivers: ${{ matrix.skip-drivers }}
|
|
secrets:
|
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- build-type: 'cublas'
|
|
cuda-major-version: "12"
|
|
cuda-minor-version: "0"
|
|
platforms: 'linux/arm64'
|
|
tag-latest: 'false'
|
|
tag-suffix: '-nvidia-l4t-arm64'
|
|
latest-image: 'latest-nvidia-l4t-arm64'
|
|
ffmpeg: 'true'
|
|
image-type: 'core'
|
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
|
runs-on: 'ubuntu-24.04-arm'
|
|
makeflags: "--jobs=4 --output-sync=target"
|
|
skip-drivers: 'true' |