feat: Add backend gallery (#5607)

* feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-17 00:05:00 +00:00 · 2025-06-15 14:56:52 +02:00 · 2025-06-15 14:56:52 +02:00 · 2d64269763
commit 2d64269763
parent a7a6020328
114 changed files with 3996 additions and 1382 deletions
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@ -0,0 +1,123 @@
+ARG BASE_IMAGE=ubuntu:22.04
+
+FROM ${BASE_IMAGE} AS builder
+ARG BACKEND=rerankers
+ARG BUILD_TYPE
+ENV BUILD_TYPE=${BUILD_TYPE}
+ARG CUDA_MAJOR_VERSION
+ARG CUDA_MINOR_VERSION
+ARG SKIP_DRIVERS=false
+ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
+ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
+ENV DEBIAN_FRONTEND=noninteractive
+ARG TARGETARCH
+ARG TARGETVARIANT
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        ccache \
+        ca-certificates \
+        espeak-ng \
+        curl \
+        libssl-dev \
+        git \
+        git-lfs \
+        unzip \
+        upx-ucl \
+        curl python3-pip \
+        python-is-python3 \
+        python3-dev llvm \
+        python3-venv make && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    pip install --upgrade pip
+
+
+# Cuda
+ENV PATH=/usr/local/cuda/bin:${PATH}
+
+# HipBLAS requirements
+ENV PATH=/opt/rocm/bin:${PATH}
+
+# Vulkan requirements
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+            software-properties-common pciutils wget gpg-agent && \
+        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+        apt-get update && \
+        apt-get install -y \
+            vulkan-sdk && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
+# CuBLAS requirements
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+            software-properties-common pciutils
+        if [ "amd64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+        fi
+        dpkg -i cuda-keyring_1.1-1_all.deb && \
+        rm -f cuda-keyring_1.1-1_all.deb && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
+# If we are building with clblas support, we need the libraries for the builds
+RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            libclblast-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* \
+    ; fi
+
+RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            hipblas-dev \
+            rocblas-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* && \
+        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
+        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
+        ldconfig \
+    ; fi
+# Install uv as a system package
+RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+# Install grpcio-tools (the version in 22.04 is too old)
+RUN pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
+
+COPY python/${BACKEND} /${BACKEND}
+COPY backend.proto /${BACKEND}/backend.proto
+COPY python/common/ /${BACKEND}/common
+
+RUN cd /${BACKEND} && make
+
+FROM scratch
+ARG BACKEND=rerankers
+COPY --from=builder /${BACKEND}/ /
--- a/backend/index.yaml
+++ b/backend/index.yaml
@ -0,0 +1,295 @@
+- name: "cuda11-rerankers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
+  alias: "cuda11-rerankers"
+
+- name: "cuda11-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
+  alias: "cuda11-vllm"
+
+- name: "cuda11-transformers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers"
+  alias: "cuda11-transformers"
+
+- name: "cuda11-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers"
+  alias: "cuda11-diffusers"
+
+- name: "cuda11-exllama2"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
+  alias: "cuda11-exllama2"
+
+- name: "cuda12-rerankers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers"
+  alias: "cuda12-rerankers"
+
+- name: "cuda12-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
+  alias: "cuda12-vllm"
+
+- name: "cuda12-transformers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
+  alias: "cuda12-transformers"
+
+- name: "cuda12-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
+  alias: "cuda12-diffusers"
+
+- name: "cuda12-exllama2"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
+  alias: "cuda12-exllama2"
+
+- name: "rocm-rerankers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
+  alias: "rocm-rerankers"
+
+- name: "rocm-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm"
+  alias: "rocm-vllm"
+
+- name: "rocm-transformers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers"
+  alias: "rocm-transformers"
+
+- name: "rocm-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
+  alias: "rocm-diffusers"
+
+- name: "intel-sycl-f32-rerankers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-rerankers"
+  alias: "intel-sycl-f32-rerankers"
+
+- name: "intel-sycl-f16-rerankers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-rerankers"
+  alias: "intel-sycl-f16-rerankers"
+
+- name: "intel-sycl-f32-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-vllm"
+  alias: "intel-sycl-f32-vllm"
+
+- name: "intel-sycl-f16-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-vllm"
+  alias: "intel-sycl-f16-vllm"
+
+- name: "intel-sycl-f32-transformers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-transformers"
+  alias: "intel-sycl-f32-transformers"
+
+- name: "intel-sycl-f16-transformers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-transformers"
+  alias: "intel-sycl-f16-transformers"
+
+- name: "intel-sycl-f32-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-diffusers"
+  alias: "intel-sycl-f32-diffusers"
+
+- name: "cuda11-rerankers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers"
+  alias: "rerankers"
+
+- name: "cuda11-vllm-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-vllm"
+  alias: "vllm"
+
+- name: "cuda11-transformers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers"
+  alias: "transformers"
+
+- name: "cuda11-diffusers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers"
+  alias: "diffusers"
+
+- name: "cuda11-exllama2-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2"
+  alias: "exllama2"
+
+- name: "cuda12-rerankers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers"
+  alias: "rerankers"
+
+- name: "cuda12-vllm-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm"
+  alias: "vllm"
+
+- name: "cuda12-transformers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers"
+  alias: "transformers"
+
+- name: "cuda12-diffusers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers"
+  alias: "diffusers"
+
+- name: "cuda12-exllama2-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
+  alias: "exllama2"
+
+- name: "rocm-rerankers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers"
+  alias: "rerankers"
+
+- name: "rocm-vllm-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm"
+  alias: "vllm"
+
+- name: "rocm-transformers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-transformers"
+  alias: "transformers"
+
+- name: "rocm-diffusers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-diffusers"
+  alias: "diffusers"
+
+- name: "intel-sycl-f32-rerankers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rerankers"
+  alias: "rerankers"
+
+- name: "intel-sycl-f16-rerankers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers"
+  alias: "rerankers"
+
+- name: "intel-sycl-f32-vllm-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-vllm"
+  alias: "vllm"
+
+- name: "intel-sycl-f16-vllm-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
+  alias: "vllm"
+
+- name: "intel-sycl-f32-transformers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-transformers"
+  alias: "transformers"
+
+- name: "intel-sycl-f16-transformers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-transformers"
+  alias: "transformers"
+
+- name: "intel-sycl-f32-diffusers-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers"
+  alias: "diffusers"
+
+- name: "cuda11-kokoro-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
+  alias: "kokoro"
+
+- name: "cuda12-kokoro-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
+  alias: "kokoro"
+
+- name: "rocm-kokoro-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-kokoro"
+  alias: "kokoro"
+
+- name: "sycl-f32-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-kokoro"
+  alias: "kokoro"
+
+- name: "sycl-f16-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-kokoro"
+  alias: "kokoro"
+
+- name: "sycl-f16-kokoro-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-kokoro"
+  alias: "kokoro"
+
+- name: "sycl-f32-kokoro-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-kokoro"
+  alias: "kokoro"
+
+- name: "cuda11-faster-whisper-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-faster-whisper"
+  alias: "faster-whisper"
+
+- name: "cuda12-faster-whisper-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
+  alias: "faster-whisper"
+
+- name: "rocm-faster-whisper-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-faster-whisper"
+  alias: "faster-whisper"
+
+- name: "sycl-f32-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-faster-whisper"
+  alias: "faster-whisper"
+
+- name: "sycl-f16-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-faster-whisper"
+  alias: "faster-whisper"
+
+- name: "sycl-f32-faster-whisper-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-faster-whisper"
+  alias: "faster-whisper"
+
+- name: "sycl-f16-faster-whisper-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-faster-whisper"
+  alias: "faster-whisper"
+
+- name: "cuda11-coqui-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui"
+  alias: "coqui"
+
+- name: "cuda12-coqui-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui"
+  alias: "coqui"
+
+- name: "rocm-coqui-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-coqui"
+  alias: "coqui"
+
+- name: "sycl-f32-coqui"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-coqui"
+  alias: "coqui"
+
+- name: "sycl-f16-coqui"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-coqui"
+  alias: "coqui"
+
+- name: "sycl-f32-coqui-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-coqui"
+  alias: "coqui"
+
+- name: "sycl-f16-coqui-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-coqui"
+  alias: "coqui"
+
+- name: "cuda11-bark-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
+  alias: "bark"
+
+- name: "cuda12-bark-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
+  alias: "bark"
+
+- name: "rocm-bark-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark"
+  alias: "bark"
+
+- name: "sycl-f32-bark"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-bark"
+  alias: "bark"
+
+- name: "sycl-f16-bark"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-bark"
+  alias: "bark"
+
+- name: "sycl-f32-bark-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-bark"
+  alias: "bark"
+
+- name: "sycl-f16-bark-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-bark"
+  alias: "bark"
+
+- name: "cuda11-chatterbox-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
+  alias: "chatterbox"
+
+- name: "cuda12-chatterbox-master"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
+  alias: "chatterbox"
+
+- name: "cuda11-chatterbox"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox"
+  alias: "chatterbox"
+
+- name: "cuda12-chatterbox"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
+  alias: "chatterbox"
--- a/backend/python/bark/Makefile
+++ b/backend/python/bark/Makefile
@ -22,7 +22,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/bark/install.sh
+++ b/backend/python/bark/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/bark/run.sh
+++ b/backend/python/bark/run.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/bark/test.sh
+++ b/backend/python/bark/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/chatterbox/Makefile
+++ b/backend/python/chatterbox/Makefile
@ -22,7 +22,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/chatterbox/install.sh
+++ b/backend/python/chatterbox/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/chatterbox/requirements-hipblas.txt
+++ b/backend/python/chatterbox/requirements-hipblas.txt
@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.6.0+rocm6.0
-torchaudio==2.6.0+rocm6.0
+torch==2.6.0+rocm6.1
+torchaudio==2.6.0+rocm6.1
 transformers==4.46.3
 chatterbox-tts
 accelerate
--- a/backend/python/chatterbox/requirements-intel.txt
+++ b/backend/python/chatterbox/requirements-intel.txt
@ -8,5 +8,4 @@ accelerate
 oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
 setuptools
-transformers==4.48.3
 accelerate
--- a/backend/python/chatterbox/run.sh
+++ b/backend/python/chatterbox/run.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/chatterbox/test.sh
+++ b/backend/python/chatterbox/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/common/template/install.sh
+++ b/backend/python/common/template/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/common/template/protogen.sh
+++ b/backend/python/common/template/protogen.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

-python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/common/template/run.sh
+++ b/backend/python/common/template/run.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/common/template/test.sh
+++ b/backend/python/common/template/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/coqui/Makefile
+++ b/backend/python/coqui/Makefile
@ -22,7 +22,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/coqui/install.sh
+++ b/backend/python/coqui/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/coqui/run.sh
+++ b/backend/python/coqui/run.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/coqui/test.sh
+++ b/backend/python/coqui/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/diffusers/Makefile
+++ b/backend/python/diffusers/Makefile
@ -32,7 +32,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/diffusers/test.sh
+++ b/backend/python/diffusers/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/exllama2/Makefile
+++ b/backend/python/exllama2/Makefile
@ -16,7 +16,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@ -5,7 +5,12 @@ LIMIT_TARGETS="cublas"
 EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
 EXLLAMA2_VERSION=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 installRequirements

--- a/backend/python/exllama2/run.sh
+++ b/backend/python/exllama2/run.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 LIMIT_TARGETS="cublas"

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/exllama2/test.sh
+++ b/backend/python/exllama2/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/faster-whisper/install.sh
+++ b/backend/python/faster-whisper/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/faster-whisper/protogen.sh
+++ b/backend/python/faster-whisper/protogen.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

-python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/faster-whisper/run.sh
+++ b/backend/python/faster-whisper/run.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/faster-whisper/test.sh
+++ b/backend/python/faster-whisper/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/kokoro/install.sh
+++ b/backend/python/kokoro/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/kokoro/protogen.sh
+++ b/backend/python/kokoro/protogen.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

-python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/kokoro/run.sh
+++ b/backend/python/kokoro/run.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/kokoro/test.sh
+++ b/backend/python/kokoro/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/rerankers/Makefile
+++ b/backend/python/rerankers/Makefile
@ -23,7 +23,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/rerankers/install.sh
+++ b/backend/python/rerankers/install.sh
@ -1,7 +1,13 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/rerankers/run.sh
+++ b/backend/python/rerankers/run.sh
@ -1,4 +1,10 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/rerankers/test.sh
+++ b/backend/python/rerankers/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/transformers/Makefile
+++ b/backend/python/transformers/Makefile
@ -23,7 +23,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/transformers/install.sh
+++ b/backend/python/transformers/install.sh
@ -1,7 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/transformers/run.sh
+++ b/backend/python/transformers/run.sh
@ -1,5 +1,10 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 if [ -d "/opt/intel" ]; then
    # Assumes we are using the Intel oneAPI container image
--- a/backend/python/transformers/test.sh
+++ b/backend/python/transformers/test.sh
@ -1,6 +1,11 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests
--- a/backend/python/vllm/Makefile
+++ b/backend/python/vllm/Makefile
@ -22,7 +22,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@ -3,7 +3,13 @@ set -e

 EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
+--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.3
 accelerate
-torch==2.7.0+rocm6.3
+torch
 transformers
 bitsandbytes
--- a/backend/python/vllm/run.sh
+++ b/backend/python/vllm/run.sh
@ -1,4 +1,11 @@
 #!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
+
+backend_dir=$(dirname $0)
+
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 startBackend $@
--- a/backend/python/vllm/test.sh
+++ b/backend/python/vllm/test.sh
@ -1,6 +1,12 @@
 #!/bin/bash
 set -e

-source $(dirname $0)/../common/libbackend.sh
+backend_dir=$(dirname $0)
+
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi

 runUnittests